]> git.ipfire.org Git - people/ms/libloc.git/blobdiff - src/scripts/location-importer.in
importer: Merge the downloader into our main downloader
[people/ms/libloc.git] / src / scripts / location-importer.in
index a17e8bd7e83b99eccd18fd60bd89bded3a1090e2..5b6ffad8531285077ecb15b05cc3ed9423408f4e 100644 (file)
@@ -18,8 +18,8 @@
 ###############################################################################
 
 import argparse
-import concurrent.futures
 import csv
+import functools
 import http.client
 import ipaddress
 import json
@@ -33,7 +33,7 @@ import urllib.error
 # Load our location module
 import location
 import location.database
-import location.importer
+from location.downloader import Downloader
 from location.i18n import _
 
 # Initialise logging
@@ -47,6 +47,19 @@ VALID_ASN_RANGES = (
        (131072, 4199999999),
 )
 
+TRANSLATED_COUNTRIES = {
+       # When people say UK, they mean GB
+       "UK" : "GB",
+}
+
+IGNORED_COUNTRIES = set((
+       # Formerly Yugoslavia
+       "YU",
+
+       # Some people use ZZ to say "no country" or to hide the country
+       "ZZ",
+))
+
 # Configure the CSV parser for ARIN
 csv.register_dialect("arin", delimiter=",", quoting=csv.QUOTE_ALL, quotechar="\"")
 
@@ -90,6 +103,8 @@ class CLI(object):
 
                # Update WHOIS
                update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
+               update_whois.add_argument("sources", nargs="*",
+                       help=_("Only update these sources"))
                update_whois.set_defaults(func=self.handle_update_whois)
 
                # Update announcements
@@ -147,6 +162,9 @@ class CLI(object):
                # Parse command line arguments
                args = self.parse_cli()
 
+               # Initialize the downloader
+               self.downloader = Downloader()
+
                # Initialise database
                self.db = self._setup_database(args)
 
@@ -220,6 +238,7 @@ class CLI(object):
                                CREATE INDEX IF NOT EXISTS geofeed_networks_search
                                        ON geofeed_networks USING GIST(network inet_ops);
                                CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
+                               ALTER TABLE network_geofeeds ADD COLUMN IF NOT EXISTS source text NOT NULL;
                                CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique
                                        ON network_geofeeds(network);
                                CREATE INDEX IF NOT EXISTS network_geofeeds_search
@@ -266,8 +285,8 @@ class CLI(object):
                                );
                                CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
                                        ON autnum_overrides(number);
-                               ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
                                ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
+                               ALTER TABLE autnum_overrides DROP COLUMN IF EXISTS source;
 
                                CREATE TABLE IF NOT EXISTS network_overrides(
                                        network inet NOT NULL,
@@ -280,8 +299,8 @@ class CLI(object):
                                        ON network_overrides(network);
                                CREATE INDEX IF NOT EXISTS network_overrides_search
                                        ON network_overrides USING GIST(network inet_ops);
-                               ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
                                ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
+                               ALTER TABLE network_overrides DROP COLUMN IF EXISTS source;
                        """)
 
                return db
@@ -293,7 +312,7 @@ class CLI(object):
                # Fetch all valid country codes to check parsed networks aganist...
                countries = self.db.query("SELECT country_code FROM countries ORDER BY country_code")
 
-               return [country.country_code for country in countries]
+               return set((country.country_code for country in countries))
 
        def handle_write(self, ns):
                """
@@ -673,175 +692,310 @@ class CLI(object):
                        writer.write(file)
 
        def handle_update_whois(self, ns):
-               downloader = location.importer.Downloader()
-
                # Did we run successfully?
-               error = False
+               success = True
+
+               sources = (
+                       # African Network Information Centre
+                       ("AFRINIC", (
+                               (self._import_standard_format, "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz"),
+                       )),
+
+                       # Asia Pacific Network Information Centre
+                       ("APNIC", (
+                               (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz"),
+                               (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz"),
+                               (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz"),
+                               (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz"),
+                       )),
+
+                       # American Registry for Internet Numbers
+                       ("ARIN", (
+                               (self._import_extended_format, "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"),
+                               (self._import_arin_as_names,   "https://ftp.arin.net/pub/resource_registry_service/asns.csv"),
+                       )),
+
+                       # Japan Network Information Center
+                       ("JPNIC", (
+                               (self._import_standard_format, "https://ftp.nic.ad.jp/jpirr/jpirr.db.gz"),
+                       )),
+
+                       # Latin America and Caribbean Network Information Centre
+                       ("LACNIC", (
+                               (self._import_standard_format, "https://ftp.lacnic.net/lacnic/dbase/lacnic.db.gz"),
+                               (self._import_extended_format, "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"),
+                       )),
+
+                       # Réseaux IP Européens
+                       ("RIPE", (
+                               (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz"),
+                               (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz"),
+                               (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz"),
+                               (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz"),
+                       )),
+               )
 
                # Fetch all valid country codes to check parsed networks against
-               validcountries = self.fetch_countries()
+               countries = self.fetch_countries()
+
+               # Check if we have countries
+               if not countries:
+                       log.error("Please import countries before importing any WHOIS data")
+                       return 1
 
                # Iterate over all potential sources
-               for source in sorted(location.importer.SOURCES):
-                       with self.db.transaction():
-                               # Create some temporary tables to store parsed data
-                               self.db.execute("""
-                                       CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
-                                               organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
-                                       CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
-
-                                       CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
-                                               name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
-                                       CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
-
-                                       CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL,
-                                               original_countries text[] NOT NULL, source text NOT NULL)
-                                               ON COMMIT DROP;
-                                       CREATE INDEX _rirdata_search ON _rirdata
-                                               USING BTREE(family(network), masklen(network));
-                                       CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
-                               """)
-
-                               # Remove all previously imported content
-                               self.db.execute("DELETE FROM autnums  WHERE source = %s", source)
-                               self.db.execute("DELETE FROM networks WHERE source = %s", source)
+               for name, feeds in sources:
+                       # Skip anything that should not be updated
+                       if ns.sources and not name in ns.sources:
+                               continue
 
-                               try:
-                                       # Fetch WHOIS sources
-                                       for url in location.importer.WHOIS_SOURCES.get(source, []):
-                                               for block in downloader.request_blocks(url):
-                                                       self._parse_block(block, source, validcountries)
-
-                                       # Fetch extended sources
-                                       for url in location.importer.EXTENDED_SOURCES.get(source, []):
-                                               for line in downloader.request_lines(url):
-                                                       self._parse_line(line, source, validcountries)
-                               except urllib.error.URLError as e:
-                                       log.error("Could not retrieve data from %s: %s" % (source, e))
-                                       error = True
-
-                                       # Continue with the next source
-                                       continue
+                       try:
+                               self._process_source(name, feeds, countries)
+
+                       # Log an error but continue if an exception occurs
+                       except Exception as e:
+                               log.error("Error processing source %s" % name, exc_info=True)
+                               success = False
+
+               # Return a non-zero exit code for errors
+               return 0 if success else 1
+
+       def _process_source(self, source, feeds, countries):
+               """
+                       This function processes one source
+               """
+               # Wrap everything into one large transaction
+               with self.db.transaction():
+                       # Remove all previously imported content
+                       self.db.execute("DELETE FROM autnums          WHERE source = %s", source)
+                       self.db.execute("DELETE FROM networks         WHERE source = %s", source)
+                       self.db.execute("DELETE FROM network_geofeeds WHERE source = %s", source)
+
+                       # Create some temporary tables to store parsed data
+                       self.db.execute("""
+                               CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
+                                       organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
+                               CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
+
+                               CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
+                                       name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
+                               CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
+
+                               CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text,
+                                       original_countries text[] NOT NULL, source text NOT NULL)
+                                       ON COMMIT DROP;
+                               CREATE INDEX _rirdata_search ON _rirdata
+                                       USING BTREE(family(network), masklen(network));
+                               CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
+                       """)
+
+                       # Parse all feeds
+                       for callback, url, *args in feeds:
+                               # Retrieve the feed
+                               f = self.downloader.retrieve(url)
+
+                               # Call the callback
+                               callback(source, countries, f, *args)
+
+                       # Process all parsed networks from every RIR we happen to have access to,
+                       # insert the largest network chunks into the networks table immediately...
+                       families = self.db.query("""
+                               SELECT DISTINCT
+                                       family(network) AS family
+                               FROM
+                                       _rirdata
+                               ORDER BY
+                                       family(network)
+                               """,
+                       )
 
-                               # Process all parsed networks from every RIR we happen to have access to,
-                               # insert the largest network chunks into the networks table immediately...
-                               families = self.db.query("""
-                                       SELECT DISTINCT
-                                               family(network) AS family
+                       for family in (row.family for row in families):
+                               # Fetch the smallest mask length in our data set
+                               smallest = self.db.get("""
+                                       SELECT
+                                               MIN(
+                                                       masklen(network)
+                                               ) AS prefix
                                        FROM
                                                _rirdata
-                                       ORDER BY
-                                               family(network)
-                                       """,
+                                       WHERE
+                                               family(network) = %s
+                                       """, family,
                                )
 
-                               for family in (row.family for row in families):
-                                       # Fetch the smallest mask length in our data set
-                                       smallest = self.db.get("""
-                                               SELECT
-                                                       MIN(
-                                                               masklen(network)
-                                                       ) AS prefix
-                                               FROM
-                                                       _rirdata
-                                               WHERE
-                                                       family(network) = %s
-                                               """, family,
+                               # Copy all networks
+                               self.db.execute("""
+                                       INSERT INTO
+                                               networks
+                                       (
+                                               network,
+                                               country,
+                                               original_countries,
+                                               source
                                        )
+                                       SELECT
+                                               network,
+                                               country,
+                                               original_countries,
+                                               source
+                                       FROM
+                                               _rirdata
+                                       WHERE
+                                               masklen(network) = %s
+                                       AND
+                                               family(network) = %s
+                                       ON CONFLICT DO
+                                               NOTHING""",
+                                       smallest.prefix,
+                                       family,
+                               )
+
+                               # ... determine any other prefixes for this network family, ...
+                               prefixes = self.db.query("""
+                                       SELECT
+                                               DISTINCT masklen(network) AS prefix
+                                       FROM
+                                               _rirdata
+                                       WHERE
+                                               family(network) = %s
+                                       ORDER BY
+                                               masklen(network) ASC
+                                       OFFSET 1
+                                       """, family,
+                               )
 
-                                       # Copy all networks
+                               # ... and insert networks with this prefix in case they provide additional
+                               # information (i. e. subnet of a larger chunk with a different country)
+                               for prefix in (row.prefix for row in prefixes):
                                        self.db.execute("""
-                                               INSERT INTO
-                                                       networks
-                                               (
-                                                       network,
-                                                       country,
-                                                       original_countries,
-                                                       source
+                                               WITH candidates AS (
+                                                       SELECT
+                                                               _rirdata.network,
+                                                               _rirdata.country,
+                                                               _rirdata.original_countries,
+                                                               _rirdata.source
+                                                       FROM
+                                                               _rirdata
+                                                       WHERE
+                                                               family(_rirdata.network) = %s
+                                                       AND
+                                                               masklen(_rirdata.network) = %s
+                                               ),
+                                               filtered AS (
+                                                       SELECT
+                                                               DISTINCT ON (c.network)
+                                                               c.network,
+                                                               c.country,
+                                                               c.original_countries,
+                                                               c.source,
+                                                               masklen(networks.network),
+                                                               networks.country AS parent_country
+                                                       FROM
+                                                               candidates c
+                                                       LEFT JOIN
+                                                               networks
+                                                       ON
+                                                               c.network << networks.network
+                                                       ORDER BY
+                                                               c.network,
+                                                               masklen(networks.network) DESC NULLS LAST
                                                )
+                                               INSERT INTO
+                                                       networks(network, country, original_countries, source)
                                                SELECT
                                                        network,
                                                        country,
                                                        original_countries,
                                                        source
                                                FROM
-                                                       _rirdata
+                                                       filtered
                                                WHERE
-                                                       masklen(network) = %s
-                                               AND
-                                                       family(network) = %s
-                                               ON CONFLICT DO
-                                                       NOTHING""",
-                                               smallest.prefix,
-                                               family,
+                                                       parent_country IS NULL
+                                               OR
+                                                       country <> parent_country
+                                               ON CONFLICT DO NOTHING
+                                               """, family, prefix,
                                        )
 
-                                       # ... determine any other prefixes for this network family, ...
-                                       prefixes = self.db.query("""
-                                               SELECT
-                                                       DISTINCT masklen(network) AS prefix
-                                               FROM
-                                                       _rirdata
-                                               WHERE
-                                                       family(network) = %s
-                                               ORDER BY
-                                                       masklen(network) ASC
-                                               OFFSET 1
-                                               """, family,
-                                       )
+                       self.db.execute("""
+                               INSERT INTO
+                                       autnums
+                               (
+                                       number,
+                                       name,
+                                       source
+                               )
+                               SELECT
+                                       _autnums.number,
+                                       _organizations.name,
+                                       _organizations.source
+                               FROM
+                                       _autnums
+                               JOIN
+                                       _organizations ON _autnums.organization = _organizations.handle
+                               ON CONFLICT
+                               (
+                                       number
+                               )
+                               DO UPDATE
+                                       SET name = excluded.name
+                               """,
+                       )
 
-                                       # ... and insert networks with this prefix in case they provide additional
-                                       # information (i. e. subnet of a larger chunk with a different country)
-                                       for prefix in (row.prefix for row in prefixes):
-                                               self.db.execute("""
-                                                       WITH candidates AS (
-                                                               SELECT
-                                                                       _rirdata.network,
-                                                                       _rirdata.country,
-                                                                       _rirdata.original_countries,
-                                                                       _rirdata.source
-                                                               FROM
-                                                                       _rirdata
-                                                               WHERE
-                                                                       family(_rirdata.network) = %s
-                                                               AND
-                                                                       masklen(_rirdata.network) = %s
-                                                       ),
-                                                       filtered AS (
-                                                               SELECT
-                                                                       DISTINCT ON (c.network)
-                                                                       c.network,
-                                                                       c.country,
-                                                                       c.original_countries,
-                                                                       c.source,
-                                                                       masklen(networks.network),
-                                                                       networks.country AS parent_country
-                                                               FROM
-                                                                       candidates c
-                                                               LEFT JOIN
-                                                                       networks
-                                                               ON
-                                                                       c.network << networks.network
-                                                               ORDER BY
-                                                                       c.network,
-                                                                       masklen(networks.network) DESC NULLS LAST
-                                                       )
-                                                       INSERT INTO
-                                                               networks(network, country, original_countries, source)
-                                                       SELECT
-                                                               network,
-                                                               country,
-                                                               original_countries,
-                                                               source
-                                                       FROM
-                                                               filtered
-                                                       WHERE
-                                                               parent_country IS NULL
-                                                       OR
-                                                               country <> parent_country
-                                                       ON CONFLICT DO NOTHING
-                                                       """, family, prefix,
-                                               )
+       def _import_standard_format(self, source, countries, f, *args):
+               """
+                       Imports a single standard format source feed
+               """
+               # Iterate over all blocks
+               for block in iterate_over_blocks(f):
+                       self._parse_block(block, source, countries)
+
+       def _import_extended_format(self, source, countries, f, *args):
+               # Iterate over all lines
+               for line in iterate_over_lines(f):
+                       self._parse_line(block, source, countries)
+
+       def _import_arin_as_names(self, source, countries, f, *args):
+               # Walk through the file
+               for line in csv.DictReader(feed, dialect="arin"):
+                       log.debug("Processing object: %s" % line)
+
+                       # Fetch status
+                       status = line.get("Status")
+
+                       # We are only interested in anything managed by ARIN
+                       if not status == "Full Registry Services":
+                               continue
+
+                       # Fetch organization name
+                       name = line.get("Org Name")
+
+                       # Extract ASNs
+                       first_asn = line.get("Start AS Number")
+                       last_asn  = line.get("End AS Number")
+
+                       # Cast to a number
+                       try:
+                               first_asn = int(first_asn)
+                       except TypeError as e:
+                               log.warning("Could not parse ASN '%s'" % first_asn)
+                               continue
+
+                       try:
+                               last_asn = int(last_asn)
+                       except TypeError as e:
+                               log.warning("Could not parse ASN '%s'" % last_asn)
+                               continue
+
+                       # Check if the range is valid
+                       if last_asn < first_asn:
+                               log.warning("Invalid ASN range %s-%s" % (first_asn, last_asn))
+
+                       # Insert everything into the database
+                       for asn in range(first_asn, last_asn + 1):
+                               if not self._check_parsed_asn(asn):
+                                       log.warning("Skipping invalid ASN %s" % asn)
+                                       continue
 
                                self.db.execute("""
                                        INSERT INTO
@@ -851,30 +1005,18 @@ class CLI(object):
                                                name,
                                                source
                                        )
-                                       SELECT
-                                               _autnums.number,
-                                               _organizations.name,
-                                               _organizations.source
-                                       FROM
-                                               _autnums
-                                       JOIN
-                                               _organizations ON _autnums.organization = _organizations.handle
+                                       VALUES
+                                       (
+                                               %s, %s, %s
+                                       )
                                        ON CONFLICT
                                        (
                                                number
                                        )
-                                       DO UPDATE
-                                               SET name = excluded.name
-                                       """,
+                                       DO NOTHING
+                                       """, asn, name, "ARIN",
                                )
 
-               # Download and import (technical) AS names from ARIN
-               with self.db.transaction():
-                       self._import_as_names_from_arin(downloader)
-
-               # Return a non-zero exit code for errors
-               return 1 if error else 0
-
        def _check_parsed_network(self, network):
                """
                        Assistive function to detect and subsequently sort out parsed
@@ -885,9 +1027,6 @@ class CLI(object):
                        (b) covering a too large chunk of the IP address space (prefix length
                                is < 7 for IPv4 networks, and < 10 for IPv6)
                        (c) "0.0.0.0" or "::" as a network address
-                       (d) are too small for being publicly announced (we have decided not to
-                               process them at the moment, as they significantly enlarge our
-                               database without providing very helpful additional information)
 
                        This unfortunately is necessary due to brain-dead clutter across
                        various RIR databases, causing mismatches and eventually disruptions.
@@ -895,45 +1034,36 @@ class CLI(object):
                        We will return False in case a network is not suitable for adding
                        it to our database, and True otherwise.
                """
+               # Check input
+               if isinstance(network, ipaddress.IPv6Network):
+                       pass
+               elif isinstance(network, ipaddress.IPv4Network):
+                       pass
+               else:
+                       raise ValueError("Invalid network: %s (type %s)" % (network, type(network)))
 
-               if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
-                       return False
-
+               # Ignore anything that isn't globally routable
                if not network.is_global:
                        log.debug("Skipping non-globally routable network: %s" % network)
                        return False
 
-               if network.version == 4:
-                       if network.prefixlen < 7:
-                               log.debug("Skipping too big IP chunk: %s" % network)
-                               return False
-
-                       if network.prefixlen > 24:
-                               log.debug("Skipping network too small to be publicly announced: %s" % network)
-                               return False
-
-                       if str(network.network_address) == "0.0.0.0":
-                               log.debug("Skipping network based on 0.0.0.0: %s" % network)
-                               return False
+               # Ignore anything that is unspecified IP range (See RFC 5735 for IPv4 or RFC 2373 for IPv6)
+               elif network.is_unspecified:
+                       log.debug("Skipping unspecified network: %s" % network)
+                       return False
 
-               elif network.version == 6:
+               # IPv6
+               if network.version == 6:
                        if network.prefixlen < 10:
                                log.debug("Skipping too big IP chunk: %s" % network)
                                return False
 
-                       if network.prefixlen > 48:
-                               log.debug("Skipping network too small to be publicly announced: %s" % network)
-                               return False
-
-                       if str(network.network_address) == "::":
-                               log.debug("Skipping network based on '::': %s" % network)
+               # IPv4
+               elif network.version == 4:
+                       if network.prefixlen < 7:
+                               log.debug("Skipping too big IP chunk: %s" % network)
                                return False
 
-               else:
-                       # This should not happen...
-                       log.warning("Skipping network of unknown family, this should not happen: %s" % network)
-                       return False
-
                # In case we have made it here, the network is considered to
                # be suitable for libloc consumption...
                return True
@@ -951,7 +1081,7 @@ class CLI(object):
                log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
                return False
 
-       def _parse_block(self, block, source_key, validcountries = None):
+       def _parse_block(self, block, source_key, countries):
                # Get first line to find out what type of block this is
                line = block[0]
 
@@ -961,7 +1091,7 @@ class CLI(object):
 
                # inetnum
                if line.startswith("inet6num:") or line.startswith("inetnum:"):
-                       return self._parse_inetnum_block(block, source_key, validcountries)
+                       return self._parse_inetnum_block(block, source_key, countries)
 
                # organisation
                elif line.startswith("organisation:"):
@@ -1014,13 +1144,9 @@ class CLI(object):
                        autnum.get("asn"), autnum.get("org"), source_key,
                )
 
-       def _parse_inetnum_block(self, block, source_key, validcountries = None):
-               log.debug("Parsing inetnum block:")
-
+       def _parse_inetnum_block(self, block, source_key, countries):
                inetnum = {}
                for line in block:
-                       log.debug(line)
-
                        # Split line
                        key, val = split_line(line)
 
@@ -1080,21 +1206,28 @@ class CLI(object):
                                inetnum[key] = [ipaddress.ip_network(val, strict=False)]
 
                        elif key == "country":
-                               val = val.upper()
+                               cc = val.upper()
 
-                               # Catch RIR data objects with more than one country code...
-                               if not key in inetnum:
-                                       inetnum[key] = []
-                               else:
-                                       if val in inetnum.get("country"):
-                                               # ... but keep this list distinct...
-                                               continue
+                               # Ignore certain country codes
+                               if cc in IGNORED_COUNTRIES:
+                                       log.debug("Ignoring country code '%s'" % cc)
+                                       continue
+
+                               # Translate country codes
+                               try:
+                                       cc = TRANSLATED_COUNTRIES[cc]
+                               except KeyError:
+                                       pass
 
-                               # When people set country codes to "UK", they actually mean "GB"
-                               if val == "UK":
-                                       val = "GB"
+                               # Do we know this country?
+                               if not cc in countries:
+                                       log.warning("Skipping invalid country code '%s'" % cc)
+                                       continue
 
-                               inetnum[key].append(val)
+                               try:
+                                       inetnum[key].append(cc)
+                               except KeyError:
+                                       inetnum[key] = [cc]
 
                        # Parse the geofeed attribute
                        elif key == "geofeed":
@@ -1107,39 +1240,47 @@ class CLI(object):
                                        inetnum["geofeed"] = m.group(1)
 
                # Skip empty objects
-               if not inetnum or not "country" in inetnum:
+               if not inetnum:
                        return
 
-               # Prepare skipping objects with unknown country codes...
-               invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
-
                # Iterate through all networks enumerated from above, check them for plausibility and insert
                # them into the database, if _check_parsed_network() succeeded
                for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
-                       if self._check_parsed_network(single_network):
-                               # Skip objects with unknown country codes if they are valid to avoid log spam...
-                               if validcountries and invalidcountries:
-                                       log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
-                                               (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
-                                       break
+                       if not self._check_parsed_network(single_network):
+                               continue
 
-                               # Everything is fine here, run INSERT statement...
-                               self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
-                                       VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
-                                       "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
+                       # Fetch the countries or use a list with an empty country
+                       countries = inetnum.get("country", [None])
+
+                       # Insert the network into the database but only use the first country code
+                       for cc in countries:
+                               self.db.execute("""
+                                       INSERT INTO
+                                               _rirdata
+                                       (
+                                               network,
+                                               country,
+                                               original_countries,
+                                               source
+                                       )
+                                       VALUES
+                                       (
+                                               %s, %s, %s, %s
+                                       )
+                                       ON CONFLICT (network)
+                                               DO UPDATE SET country = excluded.country
+                                       """, "%s" % single_network, cc, [cc for cc in countries if cc], source_key,
                                )
 
-                               # Update any geofeed information
-                               geofeed = inetnum.get("geofeed", None)
-                               if geofeed:
-                                       self._parse_geofeed(geofeed, single_network)
+                               # If there are more than one country, we will only use the first one
+                               break
 
-                               # Delete any previous geofeeds
-                               else:
-                                       self.db.execute("DELETE FROM network_geofeeds WHERE network = %s",
-                                               "%s" % single_network)
+                       # Update any geofeed information
+                       geofeed = inetnum.get("geofeed", None)
+                       if geofeed:
+                               self._parse_geofeed(source_key, geofeed, single_network)
 
-       def _parse_geofeed(self, url, single_network):
+       def _parse_geofeed(self, source, url, single_network):
                # Parse the URL
                url = urllib.parse.urlparse(url)
 
@@ -1154,16 +1295,19 @@ class CLI(object):
                # Store/update any geofeeds
                self.db.execute("""
                        INSERT INTO
-                               network_geofeeds(
-                                       network,
-                                       url
-                               )
-                       VALUES(
-                               %s, %s
+                               network_geofeeds
+                       (
+                               network,
+                               url,
+                               source
+                       )
+                       VALUES
+                       (
+                               %s, %s, %s
                        )
                        ON CONFLICT (network) DO
                                UPDATE SET url = excluded.url""",
-                       "%s" % single_network, url,
+                       "%s" % single_network, url, source,
                )
 
        def _parse_org_block(self, block, source_key):
@@ -1187,7 +1331,7 @@ class CLI(object):
                        org.get("organisation"), org.get("org-name"), source_key,
                )
 
-       def _parse_line(self, line, source_key, validcountries = None):
+       def _parse_line(self, line, source_key, validcountries=None):
                # Skip version line
                if line.startswith("2"):
                        return
@@ -1202,6 +1346,11 @@ class CLI(object):
                        log.warning("Could not parse line: %s" % line)
                        return
 
+               # Skip any unknown protocols
+               if not type in ("ipv6", "ipv4"):
+                       log.warning("Unknown IP protocol '%s'" % type)
+                       return
+
                # Skip any lines that are for stats only or do not have a country
                # code at all (avoids log spam below)
                if not country_code or country_code == '*':
@@ -1213,10 +1362,6 @@ class CLI(object):
                                (country_code, line))
                        return
 
-               if type in ("ipv6", "ipv4"):
-                       return self._parse_ip_line(country_code, type, line, source_key)
-
-       def _parse_ip_line(self, country, type, line, source_key):
                try:
                        address, prefix, date, status, organization = line.split("|")
                except ValueError:
@@ -1254,80 +1399,24 @@ class CLI(object):
                if not self._check_parsed_network(network):
                        return
 
-               self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
-                       VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
-                       UPDATE SET country = excluded.country",
-                       "%s" % network, country, [country], source_key,
+               self.db.execute("""
+                       INSERT INTO
+                               networks
+                       (
+                               network,
+                               country,
+                               original_countries,
+                               source
+                       )
+                       VALUES
+                       (
+                               %s, %s, %s, %s
+                       )
+                       ON CONFLICT (network)
+                               DO UPDATE SET country = excluded.country
+                       """, "%s" % network, country_code, [country], source_key,
                )
 
-       def _import_as_names_from_arin(self, downloader):
-               # Delete all previously imported content
-               self.db.execute("DELETE FROM autnums  WHERE source = %s", "ARIN")
-
-               # Try to retrieve the feed from ftp.arin.net
-               feed = downloader.request_lines("https://ftp.arin.net/pub/resource_registry_service/asns.csv")
-
-               # Walk through the file
-               for line in csv.DictReader(feed, dialect="arin"):
-                       log.debug("Processing object: %s" % line)
-
-                       # Fetch status
-                       status = line.get("Status")
-
-                       # We are only interested in anything managed by ARIN
-                       if not status == "Full Registry Services":
-                               continue
-
-                       # Fetch organization name
-                       name = line.get("Org Name")
-
-                       # Extract ASNs
-                       first_asn = line.get("Start AS Number")
-                       last_asn  = line.get("End AS Number")
-
-                       # Cast to a number
-                       try:
-                               first_asn = int(first_asn)
-                       except TypeError as e:
-                               log.warning("Could not parse ASN '%s'" % first_asn)
-                               continue
-
-                       try:
-                               last_asn = int(last_asn)
-                       except TypeError as e:
-                               log.warning("Could not parse ASN '%s'" % last_asn)
-                               continue
-
-                       # Check if the range is valid
-                       if last_asn < first_asn:
-                               log.warning("Invalid ASN range %s-%s" % (first_asn, last_asn))
-
-                       # Insert everything into the database
-                       for asn in range(first_asn, last_asn + 1):
-                               if not self._check_parsed_asn(asn):
-                                       log.warning("Skipping invalid ASN %s" % asn)
-                                       continue
-
-                               self.db.execute("""
-                                       INSERT INTO
-                                               autnums
-                                       (
-                                               number,
-                                               name,
-                                               source
-                                       )
-                                       VALUES
-                                       (
-                                               %s, %s, %s
-                                       )
-                                       ON CONFLICT
-                                       (
-                                               number
-                                       )
-                                       DO NOTHING
-                                       """, asn, name, "ARIN",
-                               )
-
        def handle_update_announcements(self, ns):
                server = ns.server[0]
 
@@ -1558,12 +1647,10 @@ class CLI(object):
                                id
                """)
 
-               with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
-                       results = executor.map(self._fetch_geofeed, geofeeds)
-
-                       # Fetch all results to raise any exceptions
-                       for result in results:
-                               pass
+               # Update all geofeeds
+               for geofeed in geofeeds:
+                       with self.db.transaction():
+                               self._fetch_geofeed(geofeed)
 
                # Delete data from any feeds that did not update in the last two weeks
                with self.db.transaction():
@@ -1589,99 +1676,96 @@ class CLI(object):
                with self.db.transaction():
                        # Open the URL
                        try:
-                               req = urllib.request.Request(geofeed.url, headers={
+                               # Send the request
+                               f = self.downloader.retrieve(geofeed.url, headers={
                                        "User-Agent" : "location/%s" % location.__version__,
 
                                        # We expect some plain text file in CSV format
-                                       "Accept" : "text/csv, text/plain",
+                                       "Accept"     : "text/csv, text/plain",
                                })
 
-                               # XXX set proxy
+                               # Remove any previous data
+                               self.db.execute("DELETE FROM geofeed_networks \
+                                       WHERE geofeed_id = %s", geofeed.id)
 
-                               # Send the request
-                               with urllib.request.urlopen(req, timeout=10) as f:
-                                       # Remove any previous data
-                                       self.db.execute("DELETE FROM geofeed_networks \
-                                               WHERE geofeed_id = %s", geofeed.id)
+                               lineno = 0
 
-                                       lineno = 0
+                               # Read the output line by line
+                               for line in f:
+                                       lineno += 1
 
-                                       # Read the output line by line
-                                       for line in f:
-                                               lineno += 1
+                                       try:
+                                               line = line.decode()
 
-                                               try:
-                                                       line = line.decode()
+                                       # Ignore any lines we cannot decode
+                                       except UnicodeDecodeError:
+                                               log.debug("Could not decode line %s in %s" \
+                                                       % (lineno, geofeed.url))
+                                               continue
 
-                                               # Ignore any lines we cannot decode
-                                               except UnicodeDecodeError:
-                                                       log.debug("Could not decode line %s in %s" \
-                                                               % (lineno, geofeed.url))
-                                                       continue
+                                       # Strip any newline
+                                       line = line.rstrip()
 
-                                               # Strip any newline
-                                               line = line.rstrip()
+                                       # Skip empty lines
+                                       if not line:
+                                               continue
 
-                                               # Skip empty lines
-                                               if not line:
-                                                       continue
+                                       # Try to parse the line
+                                       try:
+                                               fields = line.split(",", 5)
+                                       except ValueError:
+                                               log.debug("Could not parse line: %s" % line)
+                                               continue
 
-                                               # Try to parse the line
-                                               try:
-                                                       fields = line.split(",", 5)
-                                               except ValueError:
-                                                       log.debug("Could not parse line: %s" % line)
-                                                       continue
+                                       # Check if we have enough fields
+                                       if len(fields) < 4:
+                                               log.debug("Not enough fields in line: %s" % line)
+                                               continue
 
-                                               # Check if we have enough fields
-                                               if len(fields) < 4:
-                                                       log.debug("Not enough fields in line: %s" % line)
-                                                       continue
+                                       # Fetch all fields
+                                       network, country, region, city, = fields[:4]
 
-                                               # Fetch all fields
-                                               network, country, region, city, = fields[:4]
+                                       # Try to parse the network
+                                       try:
+                                               network = ipaddress.ip_network(network, strict=False)
+                                       except ValueError:
+                                               log.debug("Could not parse network: %s" % network)
+                                               continue
 
-                                               # Try to parse the network
-                                               try:
-                                                       network = ipaddress.ip_network(network, strict=False)
-                                               except ValueError:
-                                                       log.debug("Could not parse network: %s" % network)
-                                                       continue
-
-                                               # Strip any excess whitespace from country codes
-                                               country = country.strip()
-
-                                               # Make the country code uppercase
-                                               country = country.upper()
-
-                                               # Check the country code
-                                               if not country:
-                                                       log.debug("Empty country code in Geofeed %s line %s" \
-                                                               % (geofeed.url, lineno))
-                                                       continue
-
-                                               elif not location.country_code_is_valid(country):
-                                                       log.debug("Invalid country code in Geofeed %s:%s: %s" \
-                                                               % (geofeed.url, lineno, country))
-                                                       continue
-
-                                               # Write this into the database
-                                               self.db.execute("""
-                                                       INSERT INTO
-                                                               geofeed_networks (
-                                                                       geofeed_id,
-                                                                       network,
-                                                                       country,
-                                                                       region,
-                                                                       city
-                                                               )
-                                                       VALUES (%s, %s, %s, %s, %s)""",
-                                                       geofeed.id,
-                                                       "%s" % network,
-                                                       country,
-                                                       region,
-                                                       city,
-                                               )
+                                       # Strip any excess whitespace from country codes
+                                       country = country.strip()
+
+                                       # Make the country code uppercase
+                                       country = country.upper()
+
+                                       # Check the country code
+                                       if not country:
+                                               log.debug("Empty country code in Geofeed %s line %s" \
+                                                       % (geofeed.url, lineno))
+                                               continue
+
+                                       elif not location.country_code_is_valid(country):
+                                               log.debug("Invalid country code in Geofeed %s:%s: %s" \
+                                                       % (geofeed.url, lineno, country))
+                                               continue
+
+                                       # Write this into the database
+                                       self.db.execute("""
+                                               INSERT INTO
+                                                       geofeed_networks (
+                                                               geofeed_id,
+                                                               network,
+                                                               country,
+                                                               region,
+                                                               city
+                                                       )
+                                               VALUES (%s, %s, %s, %s, %s)""",
+                                               geofeed.id,
+                                               "%s" % network,
+                                               country,
+                                               region,
+                                               city,
+                                       )
 
                        # Catch any HTTP errors
                        except urllib.request.HTTPError as e:
@@ -1716,18 +1800,15 @@ class CLI(object):
 
        def handle_update_overrides(self, ns):
                with self.db.transaction():
-                       # Only drop manually created overrides, as we can be reasonably sure to have them,
-                       # and preserve the rest. If appropriate, it is deleted by correspondent functions.
-                       self.db.execute("""
-                               DELETE FROM autnum_overrides WHERE source = 'manual';
-                               DELETE FROM network_overrides WHERE source = 'manual';
-                       """)
+                       # Drop any previous content
+                       self.db.execute("TRUNCATE TABLE autnum_overrides")
+                       self.db.execute("TRUNCATE TABLE network_overrides")
 
                        for file in ns.files:
                                log.info("Reading %s..." % file)
 
                                with open(file, "rb") as f:
-                                       for type, block in location.importer.read_blocks(f):
+                                       for type, block in read_blocks(f):
                                                if type == "net":
                                                        network = block.get("net")
                                                        # Try to parse and normalise the network
@@ -1743,19 +1824,24 @@ class CLI(object):
                                                                continue
 
                                                        self.db.execute("""
-                                                               INSERT INTO network_overrides(
+                                                               INSERT INTO
+                                                                       network_overrides
+                                                               (
                                                                        network,
                                                                        country,
-                                                                       source,
                                                                        is_anonymous_proxy,
                                                                        is_satellite_provider,
                                                                        is_anycast,
                                                                        is_drop
-                                                               ) VALUES (%s, %s, %s, %s, %s, %s, %s)
-                                                               ON CONFLICT (network) DO NOTHING""",
+                                                               )
+                                                               VALUES
+                                                               (
+                                                                       %s, %s, %s, %s, %s, %s
+                                                               )
+                                                               ON CONFLICT (network) DO NOTHING
+                                                               """,
                                                                "%s" % network,
                                                                block.get("country"),
-                                                               "manual",
                                                                self._parse_bool(block, "is-anonymous-proxy"),
                                                                self._parse_bool(block, "is-satellite-provider"),
                                                                self._parse_bool(block, "is-anycast"),
@@ -1774,21 +1860,26 @@ class CLI(object):
                                                        autnum = autnum[2:]
 
                                                        self.db.execute("""
-                                                               INSERT INTO autnum_overrides(
+                                                               INSERT INTO
+                                                                       autnum_overrides
+                                                               (
                                                                        number,
                                                                        name,
                                                                        country,
-                                                                       source,
                                                                        is_anonymous_proxy,
                                                                        is_satellite_provider,
                                                                        is_anycast,
                                                                        is_drop
-                                                               ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
-                                                               ON CONFLICT DO NOTHING""",
+                                                               )
+                                                               VALUES
+                                                               (
+                                                                       %s, %s, %s, %s, %s, %s, %s
+                                                               )
+                                                               ON CONFLICT (number) DO NOTHING
+                                                               """,
                                                                autnum,
                                                                block.get("name"),
                                                                block.get("country"),
-                                                               "manual",
                                                                self._parse_bool(block, "is-anonymous-proxy"),
                                                                self._parse_bool(block, "is-satellite-provider"),
                                                                self._parse_bool(block, "is-anycast"),
@@ -1804,9 +1895,6 @@ class CLI(object):
                """
                success = True
 
-               # Create a downloader
-               downloader = location.importer.Downloader()
-
                feeds = (
                        # AWS IP Ranges
                        ("AWS-IP-RANGES", self._import_aws_ip_ranges, "https://ip-ranges.amazonaws.com/ip-ranges.json"),
@@ -1815,8 +1903,19 @@ class CLI(object):
                        ("SPAMHAUS-DROP",   self._import_spamhaus_drop, "https://www.spamhaus.org/drop/drop.txt"),
                        ("SPAMHAUS-EDROP",  self._import_spamhaus_drop, "https://www.spamhaus.org/drop/edrop.txt"),
                        ("SPAMHAUS-DROPV6", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/dropv6.txt"),
+
+                       # Spamhaus ASNDROP
+                       ("SPAMHAUS-ASNDROP", self._import_spamhaus_asndrop, "https://www.spamhaus.org/drop/asndrop.json"),
                )
 
+               # Drop any data from feeds that we don't support (any more)
+               with self.db.transaction():
+                       # Fetch the names of all feeds we support
+                       sources = [name for name, *rest in feeds]
+
+                       self.db.execute("DELETE FROM autnum_feeds  WHERE NOT source = ANY(%s)", sources)
+                       self.db.execute("DELETE FROM network_feeds WHERE NOT source = ANY(%s)", sources)
+
                # Walk through all feeds
                for name, callback, url, *args in feeds:
                        # Skip any feeds that were not requested on the command line
@@ -1824,25 +1923,22 @@ class CLI(object):
                                continue
 
                        try:
-                               self._process_feed(downloader, name, callback, url, *args)
+                               self._process_feed(name, callback, url, *args)
 
                        # Log an error but continue if an exception occurs
                        except Exception as e:
                                log.error("Error processing feed '%s': %s" % (name, e))
                                success = False
 
-               # Spamhaus
-               #self._update_feed_for_spamhaus_drop()
-
                # Return status
                return 0 if success else 1
 
-       def _process_feed(self, downloader, name, callback, url, *args):
+       def _process_feed(self, name, callback, url, *args):
                """
                        Processes one feed
                """
                # Open the URL
-               f = downloader.retrieve(url)
+               f = self.downloader.retrieve(url)
 
                with self.db.transaction():
                        # Drop any previous content
@@ -2030,78 +2126,54 @@ class CLI(object):
                if not lines:
                        raise RuntimeError("Received bogus feed %s with no data" % name)
 
-       def _update_feed_for_spamhaus_drop(self):
-               downloader = location.importer.Downloader()
-
-               asn_lists = [
-                                       ("SPAMHAUS-ASNDROP", "https://www.spamhaus.org/drop/asndrop.json")
-                               ]
-
-               for name, url in asn_lists:
-                       # Fetch URL
-                       f = downloader.retrieve(url)
-
-                       # Split into lines
-                       fcontent = f.readlines()
-
-                       with self.db.transaction():
-                               # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
-                               # downloads.
-                               if len(fcontent) > 10:
-                                       self.db.execute("DELETE FROM autnum_feeds WHERE source = %s", name)
-                               else:
-                                       log.warning("%s (%s) returned likely bogus file, ignored" % (name, url))
-                                       continue
+       def _import_spamhaus_asndrop(self, name, f):
+               """
+                       Import Spamhaus ASNDROP feed
+               """
+               for line in f:
+                       # Decode the line
+                       line = line.decode("utf-8")
 
-                               # Iterate through every line, filter comments and add remaining ASNs to
-                               # the override table in case they are valid...
-                               for sline in fcontent:
-                                       # The response is assumed to be encoded in UTF-8...
-                                       sline = sline.decode("utf-8")
+                       # Parse JSON
+                       try:
+                               line = json.loads(line)
+                       except json.JSONDecodeError as e:
+                               log.warning("%s: Unable to parse JSON object %s: %s" % (name, line, e))
+                               continue
 
-                                       # Load every line as a JSON object and try to obtain an ASN from it...
-                                       try:
-                                               lineobj = json.loads(sline)
-                                       except json.decoder.JSONDecodeError:
-                                               log.error("Unable to parse line as a JSON object: %s" % sline)
-                                               continue
+                       # Fetch type
+                       type = line.get("type")
 
-                                       # Skip line contiaining file metadata
-                                       try:
-                                               type = lineobj["type"]
+                       # Skip any metadata
+                       if type == "metadata":
+                               continue
 
-                                               if type == "metadata":
-                                                       continue
-                                       except KeyError:
-                                               pass
+                       # Fetch ASN
+                       asn  = line.get("asn")
 
-                                       try:
-                                               asn = lineobj["asn"]
-                                               as_name = lineobj["asname"]
-                                       except KeyError:
-                                               log.warning("Unable to extract necessary information from line: %s" % sline)
-                                               continue
+                       # Skip any lines without an ASN
+                       if not asn:
+                               continue
 
-                                       # Filter invalid ASNs...
-                                       if not self._check_parsed_asn(asn):
-                                               log.warning("Skipping bogus ASN found in %s (%s): %s" % \
-                                                       (name, url, asn))
-                                               continue
+                       # Filter invalid ASNs
+                       if not self._check_parsed_asn(asn):
+                               log.warning("%s: Skipping bogus ASN %s" % (name, asn))
+                               continue
 
-                                       # Conduct SQL statement...
-                                       self.db.execute("""
-                                               INSERT INTO
-                                                       autnum_feeds
-                                               (
-                                                       number,
-                                                       source,
-                                                       is_drop
-                                               )
-                                               VALUES
-                                               (
-                                                       %s, %s, %s
-                                               )""", "%s" % asn, name, True,
-                                       )
+                       # Write to database
+                       self.db.execute("""
+                               INSERT INTO
+                                       autnum_feeds
+                               (
+                                       number,
+                                       source,
+                                       is_drop
+                               )
+                               VALUES
+                               (
+                                       %s, %s, %s
+                               )""", "%s" % asn, name, True,
+                       )
 
        @staticmethod
        def _parse_bool(block, key):
@@ -2157,6 +2229,74 @@ def split_line(line):
 
        return key, val
 
+def read_blocks(f):
+       for block in iterate_over_blocks(f):
+               type = None
+               data = {}
+
+               for i, line in enumerate(block):
+                       key, value = line.split(":", 1)
+
+                       # The key of the first line defines the type
+                       if i == 0:
+                               type = key
+
+                       # Store value
+                       data[key] = value.strip()
+
+               yield type, data
+
+def iterate_over_blocks(f, charsets=("utf-8", "latin1")):
+       block = []
+
+       for line in f:
+               # Skip commented lines
+               if line.startswith(b"#") or line.startswith(b"%"):
+                       continue
+
+               # Convert to string
+               for charset in charsets:
+                       try:
+                               line = line.decode(charset)
+                       except UnicodeDecodeError:
+                               continue
+                       else:
+                               break
+
+               # Remove any comments at the end of line
+               line, hash, comment = line.partition("#")
+
+               # Strip any whitespace at the end of the line
+               line = line.rstrip()
+
+               # If we cut off some comment and the line is empty, we can skip it
+               if comment and not line:
+                       continue
+
+               # If the line has some content, keep collecting it
+               if line:
+                       block.append(line)
+                       continue
+
+               # End the block on an empty line
+               if block:
+                       yield block
+
+               # Reset the block
+               block = []
+
+       # Return the last block
+       if block:
+               yield block
+
+def iterate_over_lines(f):
+       for line in f:
+               # Decode the line
+               line = line.decode()
+
+               # Strip the ending
+               yield line.rstrip()
+
 def main():
        # Run the command line interface
        c = CLI()