importer: Merge the downloader into our main downloader

[people/ms/libloc.git] / src / scripts / location-importer.in
diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in

index 296058c06ef7b2381966bd5d1d8f0a5c011ddbf0..5b6ffad8531285077ecb15b05cc3ed9423408f4e 100644 (file)
--- a/src/scripts/location-importer.in
+++ b/src/scripts/location-importer.in
@@ -18,8 +18,8 @@
  ###############################################################################
  
  import argparse
-import concurrent.futures
  import csv
+import functools
  import http.client
  import ipaddress
  import json
@@ -33,7 +33,7 @@ import urllib.error
  # Load our location module
  import location
  import location.database
-import location.importer
+from location.downloader import Downloader
  from location.i18n import _
  
  # Initialise logging
@@ -47,6 +47,19 @@ VALID_ASN_RANGES = (
         (131072, 4199999999),
  )
  
+TRANSLATED_COUNTRIES = {
+       # When people say UK, they mean GB
+       "UK" : "GB",
+}
+
+IGNORED_COUNTRIES = set((
+       # Formerly Yugoslavia
+       "YU",
+
+       # Some people use ZZ to say "no country" or to hide the country
+       "ZZ",
+))
+
  # Configure the CSV parser for ARIN
  csv.register_dialect("arin", delimiter=",", quoting=csv.QUOTE_ALL, quotechar="\"")
  
@@ -90,6 +103,8 @@ class CLI(object):
  
                 # Update WHOIS
                 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
+               update_whois.add_argument("sources", nargs="*",
+                       help=_("Only update these sources"))
                 update_whois.set_defaults(func=self.handle_update_whois)
  
                 # Update announcements
@@ -104,6 +119,13 @@ class CLI(object):
                         help=_("Update Geofeeds"))
                 update_geofeeds.set_defaults(func=self.handle_update_geofeeds)
  
+               # Update feeds
+               update_feeds = subparsers.add_parser("update-feeds",
+                       help=_("Update Feeds"))
+               update_feeds.add_argument("feeds", nargs="*",
+                       help=_("Only update these feeds"))
+               update_feeds.set_defaults(func=self.handle_update_feeds)
+
                 # Update overrides
                 update_overrides = subparsers.add_parser("update-overrides",
                         help=_("Update overrides"),
@@ -140,6 +162,9 @@ class CLI(object):
                 # Parse command line arguments
                 args = self.parse_cli()
  
+               # Initialize the downloader
+               self.downloader = Downloader()
+
                 # Initialise database
                 self.db = self._setup_database(args)
  
@@ -213,6 +238,7 @@ class CLI(object):
                                 CREATE INDEX IF NOT EXISTS geofeed_networks_search
                                         ON geofeed_networks USING GIST(network inet_ops);
                                 CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
+                               ALTER TABLE network_geofeeds ADD COLUMN IF NOT EXISTS source text NOT NULL;
                                 CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique
                                         ON network_geofeeds(network);
                                 CREATE INDEX IF NOT EXISTS network_geofeeds_search
@@ -259,8 +285,8 @@ class CLI(object):
                                 );
                                 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
                                         ON autnum_overrides(number);
-                               ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
                                 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
+                               ALTER TABLE autnum_overrides DROP COLUMN IF EXISTS source;
  
                                 CREATE TABLE IF NOT EXISTS network_overrides(
                                         network inet NOT NULL,
@@ -273,12 +299,21 @@ class CLI(object):
                                         ON network_overrides(network);
                                 CREATE INDEX IF NOT EXISTS network_overrides_search
                                         ON network_overrides USING GIST(network inet_ops);
-                               ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
                                 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
+                               ALTER TABLE network_overrides DROP COLUMN IF EXISTS source;
                         """)
  
                 return db
  
+       def fetch_countries(self):
+               """
+                       Returns a list of all countries on the list
+               """
+               # Fetch all valid country codes to check parsed networks aganist...
+               countries = self.db.query("SELECT country_code FROM countries ORDER BY country_code")
+
+               return set((country.country_code for country in countries))
+
         def handle_write(self, ns):
                 """
                         Compiles a database in libloc format out of what is in the database
@@ -657,175 +692,310 @@ class CLI(object):
                         writer.write(file)
  
         def handle_update_whois(self, ns):
-               downloader = location.importer.Downloader()
-
                 # Did we run successfully?
-               error = False
+               success = True
+
+               sources = (
+                       # African Network Information Centre
+                       ("AFRINIC", (
+                               (self._import_standard_format, "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz"),
+                       )),
+
+                       # Asia Pacific Network Information Centre
+                       ("APNIC", (
+                               (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz"),
+                               (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz"),
+                               (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz"),
+                               (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz"),
+                       )),
+
+                       # American Registry for Internet Numbers
+                       ("ARIN", (
+                               (self._import_extended_format, "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"),
+                               (self._import_arin_as_names,   "https://ftp.arin.net/pub/resource_registry_service/asns.csv"),
+                       )),
+
+                       # Japan Network Information Center
+                       ("JPNIC", (
+                               (self._import_standard_format, "https://ftp.nic.ad.jp/jpirr/jpirr.db.gz"),
+                       )),
+
+                       # Latin America and Caribbean Network Information Centre
+                       ("LACNIC", (
+                               (self._import_standard_format, "https://ftp.lacnic.net/lacnic/dbase/lacnic.db.gz"),
+                               (self._import_extended_format, "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"),
+                       )),
+
+                       # Réseaux IP Européens
+                       ("RIPE", (
+                               (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz"),
+                               (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz"),
+                               (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz"),
+                               (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz"),
+                       )),
+               )
  
                 # Fetch all valid country codes to check parsed networks against
-               validcountries = self.countries
+               countries = self.fetch_countries()
+
+               # Check if we have countries
+               if not countries:
+                       log.error("Please import countries before importing any WHOIS data")
+                       return 1
  
                 # Iterate over all potential sources
-               for source in sorted(location.importer.SOURCES):
-                       with self.db.transaction():
-                               # Create some temporary tables to store parsed data
-                               self.db.execute("""
-                                       CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
-                                               organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
-                                       CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
-
-                                       CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
-                                               name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
-                                       CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
-
-                                       CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL,
-                                               original_countries text[] NOT NULL, source text NOT NULL)
-                                               ON COMMIT DROP;
-                                       CREATE INDEX _rirdata_search ON _rirdata
-                                               USING BTREE(family(network), masklen(network));
-                                       CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
-                               """)
-
-                               # Remove all previously imported content
-                               self.db.execute("DELETE FROM autnums  WHERE source = %s", source)
-                               self.db.execute("DELETE FROM networks WHERE source = %s", source)
+               for name, feeds in sources:
+                       # Skip anything that should not be updated
+                       if ns.sources and not name in ns.sources:
+                               continue
  
-                               try:
-                                       # Fetch WHOIS sources
-                                       for url in location.importer.WHOIS_SOURCES.get(source, []):
-                                               for block in downloader.request_blocks(url):
-                                                       self._parse_block(block, source, validcountries)
-
-                                       # Fetch extended sources
-                                       for url in location.importer.EXTENDED_SOURCES.get(source, []):
-                                               for line in downloader.request_lines(url):
-                                                       self._parse_line(line, source, validcountries)
-                               except urllib.error.URLError as e:
-                                       log.error("Could not retrieve data from %s: %s" % (source, e))
-                                       error = True
-
-                                       # Continue with the next source
-                                       continue
+                       try:
+                               self._process_source(name, feeds, countries)
+
+                       # Log an error but continue if an exception occurs
+                       except Exception as e:
+                               log.error("Error processing source %s" % name, exc_info=True)
+                               success = False
+
+               # Return a non-zero exit code for errors
+               return 0 if success else 1
+
+       def _process_source(self, source, feeds, countries):
+               """
+                       This function processes one source
+               """
+               # Wrap everything into one large transaction
+               with self.db.transaction():
+                       # Remove all previously imported content
+                       self.db.execute("DELETE FROM autnums          WHERE source = %s", source)
+                       self.db.execute("DELETE FROM networks         WHERE source = %s", source)
+                       self.db.execute("DELETE FROM network_geofeeds WHERE source = %s", source)
+
+                       # Create some temporary tables to store parsed data
+                       self.db.execute("""
+                               CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
+                                       organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
+                               CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
+
+                               CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
+                                       name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
+                               CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
+
+                               CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text,
+                                       original_countries text[] NOT NULL, source text NOT NULL)
+                                       ON COMMIT DROP;
+                               CREATE INDEX _rirdata_search ON _rirdata
+                                       USING BTREE(family(network), masklen(network));
+                               CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
+                       """)
  
-                               # Process all parsed networks from every RIR we happen to have access to,
-                               # insert the largest network chunks into the networks table immediately...
-                               families = self.db.query("""
-                                       SELECT DISTINCT
-                                               family(network) AS family
+                       # Parse all feeds
+                       for callback, url, *args in feeds:
+                               # Retrieve the feed
+                               f = self.downloader.retrieve(url)
+
+                               # Call the callback
+                               callback(source, countries, f, *args)
+
+                       # Process all parsed networks from every RIR we happen to have access to,
+                       # insert the largest network chunks into the networks table immediately...
+                       families = self.db.query("""
+                               SELECT DISTINCT
+                                       family(network) AS family
+                               FROM
+                                       _rirdata
+                               ORDER BY
+                                       family(network)
+                               """,
+                       )
+
+                       for family in (row.family for row in families):
+                               # Fetch the smallest mask length in our data set
+                               smallest = self.db.get("""
+                                       SELECT
+                                               MIN(
+                                                       masklen(network)
+                                               ) AS prefix
                                         FROM
                                                 _rirdata
-                                       ORDER BY
-                                               family(network)
-                                       """,
+                                       WHERE
+                                               family(network) = %s
+                                       """, family,
                                 )
  
-                               for family in (row.family for row in families):
-                                       # Fetch the smallest mask length in our data set
-                                       smallest = self.db.get("""
-                                               SELECT
-                                                       MIN(
-                                                               masklen(network)
-                                                       ) AS prefix
-                                               FROM
-                                                       _rirdata
-                                               WHERE
-                                                       family(network) = %s
-                                               """, family,
+                               # Copy all networks
+                               self.db.execute("""
+                                       INSERT INTO
+                                               networks
+                                       (
+                                               network,
+                                               country,
+                                               original_countries,
+                                               source
                                         )
+                                       SELECT
+                                               network,
+                                               country,
+                                               original_countries,
+                                               source
+                                       FROM
+                                               _rirdata
+                                       WHERE
+                                               masklen(network) = %s
+                                       AND
+                                               family(network) = %s
+                                       ON CONFLICT DO
+                                               NOTHING""",
+                                       smallest.prefix,
+                                       family,
+                               )
  
-                                       # Copy all networks
+                               # ... determine any other prefixes for this network family, ...
+                               prefixes = self.db.query("""
+                                       SELECT
+                                               DISTINCT masklen(network) AS prefix
+                                       FROM
+                                               _rirdata
+                                       WHERE
+                                               family(network) = %s
+                                       ORDER BY
+                                               masklen(network) ASC
+                                       OFFSET 1
+                                       """, family,
+                               )
+
+                               # ... and insert networks with this prefix in case they provide additional
+                               # information (i. e. subnet of a larger chunk with a different country)
+                               for prefix in (row.prefix for row in prefixes):
                                         self.db.execute("""
-                                               INSERT INTO
-                                                       networks
-                                               (
-                                                       network,
-                                                       country,
-                                                       original_countries,
-                                                       source
+                                               WITH candidates AS (
+                                                       SELECT
+                                                               _rirdata.network,
+                                                               _rirdata.country,
+                                                               _rirdata.original_countries,
+                                                               _rirdata.source
+                                                       FROM
+                                                               _rirdata
+                                                       WHERE
+                                                               family(_rirdata.network) = %s
+                                                       AND
+                                                               masklen(_rirdata.network) = %s
+                                               ),
+                                               filtered AS (
+                                                       SELECT
+                                                               DISTINCT ON (c.network)
+                                                               c.network,
+                                                               c.country,
+                                                               c.original_countries,
+                                                               c.source,
+                                                               masklen(networks.network),
+                                                               networks.country AS parent_country
+                                                       FROM
+                                                               candidates c
+                                                       LEFT JOIN
+                                                               networks
+                                                       ON
+                                                               c.network << networks.network
+                                                       ORDER BY
+                                                               c.network,
+                                                               masklen(networks.network) DESC NULLS LAST
                                                 )
+                                               INSERT INTO
+                                                       networks(network, country, original_countries, source)
                                                 SELECT
                                                         network,
                                                         country,
                                                         original_countries,
                                                         source
                                                 FROM
-                                                       _rirdata
+                                                       filtered
                                                 WHERE
-                                                       masklen(network) = %s
-                                               AND
-                                                       family(network) = %s
-                                               ON CONFLICT DO
-                                                       NOTHING""",
-                                               smallest.prefix,
-                                               family,
+                                                       parent_country IS NULL
+                                               OR
+                                                       country <> parent_country
+                                               ON CONFLICT DO NOTHING
+                                               """, family, prefix,
                                         )
  
-                                       # ... determine any other prefixes for this network family, ...
-                                       prefixes = self.db.query("""
-                                               SELECT
-                                                       DISTINCT masklen(network) AS prefix
-                                               FROM
-                                                       _rirdata
-                                               WHERE
-                                                       family(network) = %s
-                                               ORDER BY
-                                                       masklen(network) ASC
-                                               OFFSET 1
-                                               """, family,
-                                       )
+                       self.db.execute("""
+                               INSERT INTO
+                                       autnums
+                               (
+                                       number,
+                                       name,
+                                       source
+                               )
+                               SELECT
+                                       _autnums.number,
+                                       _organizations.name,
+                                       _organizations.source
+                               FROM
+                                       _autnums
+                               JOIN
+                                       _organizations ON _autnums.organization = _organizations.handle
+                               ON CONFLICT
+                               (
+                                       number
+                               )
+                               DO UPDATE
+                                       SET name = excluded.name
+                               """,
+                       )
  
-                                       # ... and insert networks with this prefix in case they provide additional
-                                       # information (i. e. subnet of a larger chunk with a different country)
-                                       for prefix in (row.prefix for row in prefixes):
-                                               self.db.execute("""
-                                                       WITH candidates AS (
-                                                               SELECT
-                                                                       _rirdata.network,
-                                                                       _rirdata.country,
-                                                                       _rirdata.original_countries,
-                                                                       _rirdata.source
-                                                               FROM
-                                                                       _rirdata
-                                                               WHERE
-                                                                       family(_rirdata.network) = %s
-                                                               AND
-                                                                       masklen(_rirdata.network) = %s
-                                                       ),
-                                                       filtered AS (
-                                                               SELECT
-                                                                       DISTINCT ON (c.network)
-                                                                       c.network,
-                                                                       c.country,
-                                                                       c.original_countries,
-                                                                       c.source,
-                                                                       masklen(networks.network),
-                                                                       networks.country AS parent_country
-                                                               FROM
-                                                                       candidates c
-                                                               LEFT JOIN
-                                                                       networks
-                                                               ON
-                                                                       c.network << networks.network
-                                                               ORDER BY
-                                                                       c.network,
-                                                                       masklen(networks.network) DESC NULLS LAST
-                                                       )
-                                                       INSERT INTO
-                                                               networks(network, country, original_countries, source)
-                                                       SELECT
-                                                               network,
-                                                               country,
-                                                               original_countries,
-                                                               source
-                                                       FROM
-                                                               filtered
-                                                       WHERE
-                                                               parent_country IS NULL
-                                                       OR
-                                                               country <> parent_country
-                                                       ON CONFLICT DO NOTHING
-                                                       """, family, prefix,
-                                               )
+       def _import_standard_format(self, source, countries, f, *args):
+               """
+                       Imports a single standard format source feed
+               """
+               # Iterate over all blocks
+               for block in iterate_over_blocks(f):
+                       self._parse_block(block, source, countries)
+
+       def _import_extended_format(self, source, countries, f, *args):
+               # Iterate over all lines
+               for line in iterate_over_lines(f):
+                       self._parse_line(block, source, countries)
+
+       def _import_arin_as_names(self, source, countries, f, *args):
+               # Walk through the file
+               for line in csv.DictReader(feed, dialect="arin"):
+                       log.debug("Processing object: %s" % line)
+
+                       # Fetch status
+                       status = line.get("Status")
+
+                       # We are only interested in anything managed by ARIN
+                       if not status == "Full Registry Services":
+                               continue
+
+                       # Fetch organization name
+                       name = line.get("Org Name")
+
+                       # Extract ASNs
+                       first_asn = line.get("Start AS Number")
+                       last_asn  = line.get("End AS Number")
+
+                       # Cast to a number
+                       try:
+                               first_asn = int(first_asn)
+                       except TypeError as e:
+                               log.warning("Could not parse ASN '%s'" % first_asn)
+                               continue
+
+                       try:
+                               last_asn = int(last_asn)
+                       except TypeError as e:
+                               log.warning("Could not parse ASN '%s'" % last_asn)
+                               continue
+
+                       # Check if the range is valid
+                       if last_asn < first_asn:
+                               log.warning("Invalid ASN range %s-%s" % (first_asn, last_asn))
+
+                       # Insert everything into the database
+                       for asn in range(first_asn, last_asn + 1):
+                               if not self._check_parsed_asn(asn):
+                                       log.warning("Skipping invalid ASN %s" % asn)
+                                       continue
  
                                 self.db.execute("""
                                         INSERT INTO
@@ -835,30 +1005,18 @@ class CLI(object):
                                                 name,
                                                 source
                                         )
-                                       SELECT
-                                               _autnums.number,
-                                               _organizations.name,
-                                               _organizations.source
-                                       FROM
-                                               _autnums
-                                       JOIN
-                                               _organizations ON _autnums.organization = _organizations.handle
+                                       VALUES
+                                       (
+                                               %s, %s, %s
+                                       )
                                         ON CONFLICT
                                         (
                                                 number
                                         )
-                                       DO UPDATE
-                                               SET name = excluded.name
-                                       """,
+                                       DO NOTHING
+                                       """, asn, name, "ARIN",
                                 )
  
-               # Download and import (technical) AS names from ARIN
-               with self.db.transaction():
-                       self._import_as_names_from_arin(downloader)
-
-               # Return a non-zero exit code for errors
-               return 1 if error else 0
-
         def _check_parsed_network(self, network):
                 """
                         Assistive function to detect and subsequently sort out parsed
@@ -869,9 +1027,6 @@ class CLI(object):
                         (b) covering a too large chunk of the IP address space (prefix length
                                 is < 7 for IPv4 networks, and < 10 for IPv6)
                         (c) "0.0.0.0" or "::" as a network address
-                       (d) are too small for being publicly announced (we have decided not to
-                               process them at the moment, as they significantly enlarge our
-                               database without providing very helpful additional information)
  
                         This unfortunately is necessary due to brain-dead clutter across
                         various RIR databases, causing mismatches and eventually disruptions.
@@ -879,45 +1034,36 @@ class CLI(object):
                         We will return False in case a network is not suitable for adding
                         it to our database, and True otherwise.
                 """
+               # Check input
+               if isinstance(network, ipaddress.IPv6Network):
+                       pass
+               elif isinstance(network, ipaddress.IPv4Network):
+                       pass
+               else:
+                       raise ValueError("Invalid network: %s (type %s)" % (network, type(network)))
  
-               if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
-                       return False
-
+               # Ignore anything that isn't globally routable
                 if not network.is_global:
                         log.debug("Skipping non-globally routable network: %s" % network)
                         return False
  
-               if network.version == 4:
-                       if network.prefixlen < 7:
-                               log.debug("Skipping too big IP chunk: %s" % network)
-                               return False
-
-                       if network.prefixlen > 24:
-                               log.debug("Skipping network too small to be publicly announced: %s" % network)
-                               return False
-
-                       if str(network.network_address) == "0.0.0.0":
-                               log.debug("Skipping network based on 0.0.0.0: %s" % network)
-                               return False
+               # Ignore anything that is unspecified IP range (See RFC 5735 for IPv4 or RFC 2373 for IPv6)
+               elif network.is_unspecified:
+                       log.debug("Skipping unspecified network: %s" % network)
+                       return False
  
-               elif network.version == 6:
+               # IPv6
+               if network.version == 6:
                         if network.prefixlen < 10:
                                 log.debug("Skipping too big IP chunk: %s" % network)
                                 return False
  
-                       if network.prefixlen > 48:
-                               log.debug("Skipping network too small to be publicly announced: %s" % network)
-                               return False
-
-                       if str(network.network_address) == "::":
-                               log.debug("Skipping network based on '::': %s" % network)
+               # IPv4
+               elif network.version == 4:
+                       if network.prefixlen < 7:
+                               log.debug("Skipping too big IP chunk: %s" % network)
                                 return False
  
-               else:
-                       # This should not happen...
-                       log.warning("Skipping network of unknown family, this should not happen: %s" % network)
-                       return False
-
                 # In case we have made it here, the network is considered to
                 # be suitable for libloc consumption...
                 return True
@@ -935,7 +1081,7 @@ class CLI(object):
                 log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
                 return False
  
-       def _parse_block(self, block, source_key, validcountries = None):
+       def _parse_block(self, block, source_key, countries):
                 # Get first line to find out what type of block this is
                 line = block[0]
  
@@ -945,7 +1091,7 @@ class CLI(object):
  
                 # inetnum
                 if line.startswith("inet6num:") or line.startswith("inetnum:"):
-                       return self._parse_inetnum_block(block, source_key, validcountries)
+                       return self._parse_inetnum_block(block, source_key, countries)
  
                 # organisation
                 elif line.startswith("organisation:"):
@@ -998,13 +1144,9 @@ class CLI(object):
                         autnum.get("asn"), autnum.get("org"), source_key,
                 )
  
-       def _parse_inetnum_block(self, block, source_key, validcountries = None):
-               log.debug("Parsing inetnum block:")
-
+       def _parse_inetnum_block(self, block, source_key, countries):
                 inetnum = {}
                 for line in block:
-                       log.debug(line)
-
                         # Split line
                         key, val = split_line(line)
  
@@ -1064,21 +1206,28 @@ class CLI(object):
                                 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
  
                         elif key == "country":
-                               val = val.upper()
+                               cc = val.upper()
  
-                               # Catch RIR data objects with more than one country code...
-                               if not key in inetnum:
-                                       inetnum[key] = []
-                               else:
-                                       if val in inetnum.get("country"):
-                                               # ... but keep this list distinct...
-                                               continue
+                               # Ignore certain country codes
+                               if cc in IGNORED_COUNTRIES:
+                                       log.debug("Ignoring country code '%s'" % cc)
+                                       continue
+
+                               # Translate country codes
+                               try:
+                                       cc = TRANSLATED_COUNTRIES[cc]
+                               except KeyError:
+                                       pass
  
-                               # When people set country codes to "UK", they actually mean "GB"
-                               if val == "UK":
-                                       val = "GB"
+                               # Do we know this country?
+                               if not cc in countries:
+                                       log.warning("Skipping invalid country code '%s'" % cc)
+                                       continue
  
-                               inetnum[key].append(val)
+                               try:
+                                       inetnum[key].append(cc)
+                               except KeyError:
+                                       inetnum[key] = [cc]
  
                         # Parse the geofeed attribute
                         elif key == "geofeed":
@@ -1091,39 +1240,47 @@ class CLI(object):
                                         inetnum["geofeed"] = m.group(1)
  
                 # Skip empty objects
-               if not inetnum or not "country" in inetnum:
+               if not inetnum:
                         return
  
-               # Prepare skipping objects with unknown country codes...
-               invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
-
                 # Iterate through all networks enumerated from above, check them for plausibility and insert
                 # them into the database, if _check_parsed_network() succeeded
                 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
-                       if self._check_parsed_network(single_network):
-                               # Skip objects with unknown country codes if they are valid to avoid log spam...
-                               if validcountries and invalidcountries:
-                                       log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
-                                               (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
-                                       break
+                       if not self._check_parsed_network(single_network):
+                               continue
+
+                       # Fetch the countries or use a list with an empty country
+                       countries = inetnum.get("country", [None])
  
-                               # Everything is fine here, run INSERT statement...
-                               self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
-                                       VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
-                                       "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
+                       # Insert the network into the database but only use the first country code
+                       for cc in countries:
+                               self.db.execute("""
+                                       INSERT INTO
+                                               _rirdata
+                                       (
+                                               network,
+                                               country,
+                                               original_countries,
+                                               source
+                                       )
+                                       VALUES
+                                       (
+                                               %s, %s, %s, %s
+                                       )
+                                       ON CONFLICT (network)
+                                               DO UPDATE SET country = excluded.country
+                                       """, "%s" % single_network, cc, [cc for cc in countries if cc], source_key,
                                 )
  
-                               # Update any geofeed information
-                               geofeed = inetnum.get("geofeed", None)
-                               if geofeed:
-                                       self._parse_geofeed(geofeed, single_network)
+                               # If there are more than one country, we will only use the first one
+                               break
  
-                               # Delete any previous geofeeds
-                               else:
-                                       self.db.execute("DELETE FROM network_geofeeds WHERE network = %s",
-                                               "%s" % single_network)
+                       # Update any geofeed information
+                       geofeed = inetnum.get("geofeed", None)
+                       if geofeed:
+                               self._parse_geofeed(source_key, geofeed, single_network)
  
-       def _parse_geofeed(self, url, single_network):
+       def _parse_geofeed(self, source, url, single_network):
                 # Parse the URL
                 url = urllib.parse.urlparse(url)
  
@@ -1138,16 +1295,19 @@ class CLI(object):
                 # Store/update any geofeeds
                 self.db.execute("""
                         INSERT INTO
-                               network_geofeeds(
-                                       network,
-                                       url
-                               )
-                       VALUES(
-                               %s, %s
+                               network_geofeeds
+                       (
+                               network,
+                               url,
+                               source
+                       )
+                       VALUES
+                       (
+                               %s, %s, %s
                         )
                         ON CONFLICT (network) DO
                                 UPDATE SET url = excluded.url""",
-                       "%s" % single_network, url,
+                       "%s" % single_network, url, source,
                 )
  
         def _parse_org_block(self, block, source_key):
@@ -1171,7 +1331,7 @@ class CLI(object):
                         org.get("organisation"), org.get("org-name"), source_key,
                 )
  
-       def _parse_line(self, line, source_key, validcountries = None):
+       def _parse_line(self, line, source_key, validcountries=None):
                 # Skip version line
                 if line.startswith("2"):
                         return
@@ -1186,6 +1346,11 @@ class CLI(object):
                         log.warning("Could not parse line: %s" % line)
                         return
  
+               # Skip any unknown protocols
+               if not type in ("ipv6", "ipv4"):
+                       log.warning("Unknown IP protocol '%s'" % type)
+                       return
+
                 # Skip any lines that are for stats only or do not have a country
                 # code at all (avoids log spam below)
                 if not country_code or country_code == '*':
@@ -1197,10 +1362,6 @@ class CLI(object):
                                 (country_code, line))
                         return
  
-               if type in ("ipv6", "ipv4"):
-                       return self._parse_ip_line(country_code, type, line, source_key)
-
-       def _parse_ip_line(self, country, type, line, source_key):
                 try:
                         address, prefix, date, status, organization = line.split("|")
                 except ValueError:
@@ -1238,80 +1399,24 @@ class CLI(object):
                 if not self._check_parsed_network(network):
                         return
  
-               self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
-                       VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
-                       UPDATE SET country = excluded.country",
-                       "%s" % network, country, [country], source_key,
+               self.db.execute("""
+                       INSERT INTO
+                               networks
+                       (
+                               network,
+                               country,
+                               original_countries,
+                               source
+                       )
+                       VALUES
+                       (
+                               %s, %s, %s, %s
+                       )
+                       ON CONFLICT (network)
+                               DO UPDATE SET country = excluded.country
+                       """, "%s" % network, country_code, [country], source_key,
                 )
  
-       def _import_as_names_from_arin(self, downloader):
-               # Delete all previously imported content
-               self.db.execute("DELETE FROM autnums  WHERE source = %s", "ARIN")
-
-               # Try to retrieve the feed from ftp.arin.net
-               feed = downloader.request_lines("https://ftp.arin.net/pub/resource_registry_service/asns.csv")
-
-               # Walk through the file
-               for line in csv.DictReader(feed, dialect="arin"):
-                       log.debug("Processing object: %s" % line)
-
-                       # Fetch status
-                       status = line.get("Status")
-
-                       # We are only interested in anything managed by ARIN
-                       if not status == "Full Registry Services":
-                               continue
-
-                       # Fetch organization name
-                       name = line.get("Org Name")
-
-                       # Extract ASNs
-                       first_asn = line.get("Start AS Number")
-                       last_asn  = line.get("End AS Number")
-
-                       # Cast to a number
-                       try:
-                               first_asn = int(first_asn)
-                       except TypeError as e:
-                               log.warning("Could not parse ASN '%s'" % first_asn)
-                               continue
-
-                       try:
-                               last_asn = int(last_asn)
-                       except TypeError as e:
-                               log.warning("Could not parse ASN '%s'" % last_asn)
-                               continue
-
-                       # Check if the range is valid
-                       if last_asn < first_asn:
-                               log.warning("Invalid ASN range %s-%s" % (first_asn, last_asn))
-
-                       # Insert everything into the database
-                       for asn in range(first_asn, last_asn + 1):
-                               if not self._check_parsed_asn(asn):
-                                       log.warning("Skipping invalid ASN %s" % asn)
-                                       continue
-
-                               self.db.execute("""
-                                       INSERT INTO
-                                               autnums
-                                       (
-                                               number,
-                                               name,
-                                               source
-                                       )
-                                       VALUES
-                                       (
-                                               %s, %s, %s
-                                       )
-                                       ON CONFLICT
-                                       (
-                                               number
-                                       )
-                                       DO NOTHING
-                                       """, asn, name, "ARIN",
-                               )
-
         def handle_update_announcements(self, ns):
                 server = ns.server[0]
  
@@ -1542,12 +1647,10 @@ class CLI(object):
                                 id
                 """)
  
-               with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
-                       results = executor.map(self._fetch_geofeed, geofeeds)
-
-                       # Fetch all results to raise any exceptions
-                       for result in results:
-                               pass
+               # Update all geofeeds
+               for geofeed in geofeeds:
+                       with self.db.transaction():
+                               self._fetch_geofeed(geofeed)
  
                 # Delete data from any feeds that did not update in the last two weeks
                 with self.db.transaction():
@@ -1573,99 +1676,96 @@ class CLI(object):
                 with self.db.transaction():
                         # Open the URL
                         try:
-                               req = urllib.request.Request(geofeed.url, headers={
+                               # Send the request
+                               f = self.downloader.retrieve(geofeed.url, headers={
                                         "User-Agent" : "location/%s" % location.__version__,
  
                                         # We expect some plain text file in CSV format
-                                       "Accept" : "text/csv, text/plain",
+                                       "Accept"     : "text/csv, text/plain",
                                 })
  
-                               # XXX set proxy
+                               # Remove any previous data
+                               self.db.execute("DELETE FROM geofeed_networks \
+                                       WHERE geofeed_id = %s", geofeed.id)
  
-                               # Send the request
-                               with urllib.request.urlopen(req, timeout=10) as f:
-                                       # Remove any previous data
-                                       self.db.execute("DELETE FROM geofeed_networks \
-                                               WHERE geofeed_id = %s", geofeed.id)
+                               lineno = 0
  
-                                       lineno = 0
+                               # Read the output line by line
+                               for line in f:
+                                       lineno += 1
  
-                                       # Read the output line by line
-                                       for line in f:
-                                               lineno += 1
+                                       try:
+                                               line = line.decode()
  
-                                               try:
-                                                       line = line.decode()
+                                       # Ignore any lines we cannot decode
+                                       except UnicodeDecodeError:
+                                               log.debug("Could not decode line %s in %s" \
+                                                       % (lineno, geofeed.url))
+                                               continue
  
-                                               # Ignore any lines we cannot decode
-                                               except UnicodeDecodeError:
-                                                       log.debug("Could not decode line %s in %s" \
-                                                               % (lineno, geofeed.url))
-                                                       continue
+                                       # Strip any newline
+                                       line = line.rstrip()
  
-                                               # Strip any newline
-                                               line = line.rstrip()
+                                       # Skip empty lines
+                                       if not line:
+                                               continue
  
-                                               # Skip empty lines
-                                               if not line:
-                                                       continue
+                                       # Try to parse the line
+                                       try:
+                                               fields = line.split(",", 5)
+                                       except ValueError:
+                                               log.debug("Could not parse line: %s" % line)
+                                               continue
  
-                                               # Try to parse the line
-                                               try:
-                                                       fields = line.split(",", 5)
-                                               except ValueError:
-                                                       log.debug("Could not parse line: %s" % line)
-                                                       continue
+                                       # Check if we have enough fields
+                                       if len(fields) < 4:
+                                               log.debug("Not enough fields in line: %s" % line)
+                                               continue
  
-                                               # Check if we have enough fields
-                                               if len(fields) < 4:
-                                                       log.debug("Not enough fields in line: %s" % line)
-                                                       continue
+                                       # Fetch all fields
+                                       network, country, region, city, = fields[:4]
  
-                                               # Fetch all fields
-                                               network, country, region, city, = fields[:4]
+                                       # Try to parse the network
+                                       try:
+                                               network = ipaddress.ip_network(network, strict=False)
+                                       except ValueError:
+                                               log.debug("Could not parse network: %s" % network)
+                                               continue
  
-                                               # Try to parse the network
-                                               try:
-                                                       network = ipaddress.ip_network(network, strict=False)
-                                               except ValueError:
-                                                       log.debug("Could not parse network: %s" % network)
-                                                       continue
-
-                                               # Strip any excess whitespace from country codes
-                                               country = country.strip()
-
-                                               # Make the country code uppercase
-                                               country = country.upper()
-
-                                               # Check the country code
-                                               if not country:
-                                                       log.debug("Empty country code in Geofeed %s line %s" \
-                                                               % (geofeed.url, lineno))
-                                                       continue
-
-                                               elif not location.country_code_is_valid(country):
-                                                       log.debug("Invalid country code in Geofeed %s:%s: %s" \
-                                                               % (geofeed.url, lineno, country))
-                                                       continue
-
-                                               # Write this into the database
-                                               self.db.execute("""
-                                                       INSERT INTO
-                                                               geofeed_networks (
-                                                                       geofeed_id,
-                                                                       network,
-                                                                       country,
-                                                                       region,
-                                                                       city
-                                                               )
-                                                       VALUES (%s, %s, %s, %s, %s)""",
-                                                       geofeed.id,
-                                                       "%s" % network,
-                                                       country,
-                                                       region,
-                                                       city,
-                                               )
+                                       # Strip any excess whitespace from country codes
+                                       country = country.strip()
+
+                                       # Make the country code uppercase
+                                       country = country.upper()
+
+                                       # Check the country code
+                                       if not country:
+                                               log.debug("Empty country code in Geofeed %s line %s" \
+                                                       % (geofeed.url, lineno))
+                                               continue
+
+                                       elif not location.country_code_is_valid(country):
+                                               log.debug("Invalid country code in Geofeed %s:%s: %s" \
+                                                       % (geofeed.url, lineno, country))
+                                               continue
+
+                                       # Write this into the database
+                                       self.db.execute("""
+                                               INSERT INTO
+                                                       geofeed_networks (
+                                                               geofeed_id,
+                                                               network,
+                                                               country,
+                                                               region,
+                                                               city
+                                                       )
+                                               VALUES (%s, %s, %s, %s, %s)""",
+                                               geofeed.id,
+                                               "%s" % network,
+                                               country,
+                                               region,
+                                               city,
+                                       )
  
                         # Catch any HTTP errors
                         except urllib.request.HTTPError as e:
@@ -1700,25 +1800,15 @@ class CLI(object):
  
         def handle_update_overrides(self, ns):
                 with self.db.transaction():
-                       # Only drop manually created overrides, as we can be reasonably sure to have them,
-                       # and preserve the rest. If appropriate, it is deleted by correspondent functions.
-                       self.db.execute("""
-                               DELETE FROM autnum_overrides WHERE source = 'manual';
-                               DELETE FROM network_overrides WHERE source = 'manual';
-                       """)
-
-                       # Update overrides for various cloud providers big enough to publish their own IP
-                       # network allocation lists in a machine-readable format...
-                       self._update_overrides_for_aws()
-
-                       # Update overrides for Spamhaus DROP feeds...
-                       self._update_feed_for_spamhaus_drop()
+                       # Drop any previous content
+                       self.db.execute("TRUNCATE TABLE autnum_overrides")
+                       self.db.execute("TRUNCATE TABLE network_overrides")
  
                         for file in ns.files:
                                 log.info("Reading %s..." % file)
  
                                 with open(file, "rb") as f:
-                                       for type, block in location.importer.read_blocks(f):
+                                       for type, block in read_blocks(f):
                                                 if type == "net":
                                                         network = block.get("net")
                                                         # Try to parse and normalise the network
@@ -1734,19 +1824,24 @@ class CLI(object):
                                                                 continue
  
                                                         self.db.execute("""
-                                                               INSERT INTO network_overrides(
+                                                               INSERT INTO
+                                                                       network_overrides
+                                                               (
                                                                         network,
                                                                         country,
-                                                                       source,
                                                                         is_anonymous_proxy,
                                                                         is_satellite_provider,
                                                                         is_anycast,
                                                                         is_drop
-                                                               ) VALUES (%s, %s, %s, %s, %s, %s, %s)
-                                                               ON CONFLICT (network) DO NOTHING""",
+                                                               )
+                                                               VALUES
+                                                               (
+                                                                       %s, %s, %s, %s, %s, %s
+                                                               )
+                                                               ON CONFLICT (network) DO NOTHING
+                                                               """,
                                                                 "%s" % network,
                                                                 block.get("country"),
-                                                               "manual",
                                                                 self._parse_bool(block, "is-anonymous-proxy"),
                                                                 self._parse_bool(block, "is-satellite-provider"),
                                                                 self._parse_bool(block, "is-anycast"),
@@ -1765,21 +1860,26 @@ class CLI(object):
                                                         autnum = autnum[2:]
  
                                                         self.db.execute("""
-                                                               INSERT INTO autnum_overrides(
+                                                               INSERT INTO
+                                                                       autnum_overrides
+                                                               (
                                                                         number,
                                                                         name,
                                                                         country,
-                                                                       source,
                                                                         is_anonymous_proxy,
                                                                         is_satellite_provider,
                                                                         is_anycast,
                                                                         is_drop
-                                                               ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
-                                                               ON CONFLICT DO NOTHING""",
+                                                               )
+                                                               VALUES
+                                                               (
+                                                                       %s, %s, %s, %s, %s, %s, %s
+                                                               )
+                                                               ON CONFLICT (number) DO NOTHING
+                                                               """,
                                                                 autnum,
                                                                 block.get("name"),
                                                                 block.get("country"),
-                                                               "manual",
                                                                 self._parse_bool(block, "is-anonymous-proxy"),
                                                                 self._parse_bool(block, "is-satellite-provider"),
                                                                 self._parse_bool(block, "is-anycast"),
@@ -1789,258 +1889,291 @@ class CLI(object):
                                                 else:
                                                         log.warning("Unsupported type: %s" % type)
  
-       def _update_overrides_for_aws(self):
-               # Download Amazon AWS IP allocation file to create overrides...
-               downloader = location.importer.Downloader()
+       def handle_update_feeds(self, ns):
+               """
+                       Update any third-party feeds
+               """
+               success = True
  
-               try:
-                       # Fetch IP ranges
-                       f = downloader.retrieve("https://ip-ranges.amazonaws.com/ip-ranges.json")
+               feeds = (
+                       # AWS IP Ranges
+                       ("AWS-IP-RANGES", self._import_aws_ip_ranges, "https://ip-ranges.amazonaws.com/ip-ranges.json"),
  
-                       # Parse downloaded file
-                       aws_ip_dump = json.load(f)
-               except Exception as e:
-                       log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
-                       return
+                       # Spamhaus DROP
+                       ("SPAMHAUS-DROP",   self._import_spamhaus_drop, "https://www.spamhaus.org/drop/drop.txt"),
+                       ("SPAMHAUS-EDROP",  self._import_spamhaus_drop, "https://www.spamhaus.org/drop/edrop.txt"),
+                       ("SPAMHAUS-DROPV6", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/dropv6.txt"),
  
-               # At this point, we can assume the downloaded file to be valid
-               self.db.execute("""
-                       DELETE FROM network_feeds WHERE source = 'Amazon AWS IP feed'
-               """)
+                       # Spamhaus ASNDROP
+                       ("SPAMHAUS-ASNDROP", self._import_spamhaus_asndrop, "https://www.spamhaus.org/drop/asndrop.json"),
+               )
+
+               # Drop any data from feeds that we don't support (any more)
+               with self.db.transaction():
+                       # Fetch the names of all feeds we support
+                       sources = [name for name, *rest in feeds]
+
+                       self.db.execute("DELETE FROM autnum_feeds  WHERE NOT source = ANY(%s)", sources)
+                       self.db.execute("DELETE FROM network_feeds WHERE NOT source = ANY(%s)", sources)
+
+               # Walk through all feeds
+               for name, callback, url, *args in feeds:
+                       # Skip any feeds that were not requested on the command line
+                       if ns.feeds and not name in ns.feeds:
+                               continue
+
+                       try:
+                               self._process_feed(name, callback, url, *args)
+
+                       # Log an error but continue if an exception occurs
+                       except Exception as e:
+                               log.error("Error processing feed '%s': %s" % (name, e))
+                               success = False
+
+               # Return status
+               return 0 if success else 1
+
+       def _process_feed(self, name, callback, url, *args):
+               """
+                       Processes one feed
+               """
+               # Open the URL
+               f = self.downloader.retrieve(url)
+
+               with self.db.transaction():
+                       # Drop any previous content
+                       self.db.execute("DELETE FROM autnum_feeds  WHERE source = %s", name)
+                       self.db.execute("DELETE FROM network_feeds WHERE source = %s", name)
+
+                       # Call the callback to process the feed
+                       return callback(name, f, *args)
  
-               # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
+       def _import_aws_ip_ranges(self, name, f):
+               # Parse the feed
+               feed = json.load(f)
+
+               # Set up a dictionary for mapping a region name to a country. Unfortunately,
                 # there seems to be no machine-readable version available of this other than
                 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
                 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
                 # was helpful here as well.
                 aws_region_country_map = {
-                               "af-south-1": "ZA",
-                               "ap-east-1": "HK",
-                               "ap-south-1": "IN",
-                               "ap-south-2": "IN",
-                               "ap-northeast-3": "JP",
-                               "ap-northeast-2": "KR",
-                               "ap-southeast-1": "SG",
-                               "ap-southeast-2": "AU",
-                               "ap-southeast-3": "MY",
-                               "ap-southeast-4": "AU",
-                               "ap-southeast-5": "NZ", # Auckland, NZ
-                               "ap-southeast-6": "AP", # XXX: Precise location not documented anywhere
-                               "ap-northeast-1": "JP",
-                               "ca-central-1": "CA",
-                               "ca-west-1": "CA",
-                               "eu-central-1": "DE",
-                               "eu-central-2": "CH",
-                               "eu-west-1": "IE",
-                               "eu-west-2": "GB",
-                               "eu-south-1": "IT",
-                               "eu-south-2": "ES",
-                               "eu-west-3": "FR",
-                               "eu-north-1": "SE",
-                               "eusc-de-east-1" : "DE", # XXX: Undocumented, likely located in Berlin rather than Frankfurt
-                               "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
-                               "me-central-1": "AE",
-                               "me-south-1": "BH",
-                               "sa-east-1": "BR"
-                               }
+                       # Africa
+                       "af-south-1"     : "ZA",
+
+                       # Asia
+                       "il-central-1"   : "IL", # Tel Aviv
+
+                       # Asia/Pacific
+                       "ap-northeast-1" : "JP",
+                       "ap-northeast-2" : "KR",
+                       "ap-northeast-3" : "JP",
+                       "ap-east-1"      : "HK",
+                       "ap-south-1"     : "IN",
+                       "ap-south-2"     : "IN",
+                       "ap-southeast-1" : "SG",
+                       "ap-southeast-2" : "AU",
+                       "ap-southeast-3" : "MY",
+                       "ap-southeast-4" : "AU",
+                       "ap-southeast-5" : "NZ", # Auckland, NZ
+                       "ap-southeast-6" : "AP", # XXX: Precise location not documented anywhere
+
+                       # Canada
+                       "ca-central-1"   : "CA",
+                       "ca-west-1"      : "CA",
+
+                       # Europe
+                       "eu-central-1"   : "DE",
+                       "eu-central-2"   : "CH",
+                       "eu-north-1"     : "SE",
+                       "eu-west-1"      : "IE",
+                       "eu-west-2"      : "GB",
+                       "eu-west-3"      : "FR",
+                       "eu-south-1"     : "IT",
+                       "eu-south-2"     : "ES",
+
+                       # Middle East
+                       "me-central-1"   : "AE",
+                       "me-south-1"     : "BH",
+
+                       # South America
+                       "sa-east-1"      : "BR",
+
+                       # Undocumented, likely located in Berlin rather than Frankfurt
+                       "eusc-de-east-1" : "DE",
+               }
+
+               # Collect a list of all networks
+               prefixes = feed.get("ipv6_prefixes", []) + feed.get("prefixes", [])
+
+               for prefix in prefixes:
+                       # Fetch network
+                       network = prefix.get("ipv6_prefix") or prefix.get("ip_prefix")
+
+                       # Parse the network
+                       try:
+                               network = ipaddress.ip_network(network)
+                       except ValuleError as e:
+                               log.warning("%s: Unable to parse prefix %s" % (name, network))
+                               continue
  
-               # Fetch all valid country codes to check parsed networks aganist...
-               rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
-               validcountries = []
+                       # Sanitize parsed networks...
+                       if not self._check_parsed_network(network):
+                               continue
  
-               for row in rows:
-                       validcountries.append(row.country_code)
+                       # Fetch the region
+                       region = prefix.get("region")
  
-               with self.db.transaction():
-                       for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
-                               try:
-                                       network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
-                               except ValueError:
-                                       log.warning("Unable to parse line: %s" % snetwork)
-                                       continue
+                       # Set some defaults
+                       cc = None
+                       is_anycast = False
  
-                               # Sanitize parsed networks...
-                               if not self._check_parsed_network(network):
-                                       continue
+                       # Fetch the CC from the dictionary
+                       try:
+                               cc = aws_region_country_map[region]
  
-                               # Determine region of this network...
-                               region = snetwork["region"]
-                               cc = None
-                               is_anycast = False
+                       # If we couldn't find anything, let's try something else...
+                       except KeyError as e:
+                               # Find anycast networks
+                               if region == "GLOBAL":
+                                       is_anycast = True
  
-                               # Any region name starting with "us-" will get "US" country code assigned straight away...
-                               if region.startswith("us-"):
+                               # Everything that starts with us- is probably in the United States
+                               elif region.startswith("us-"):
                                         cc = "US"
+
+                               # Everything that starts with cn- is probably China
                                 elif region.startswith("cn-"):
-                                       # ... same goes for China ...
                                         cc = "CN"
-                               elif region == "GLOBAL":
-                                       # ... funny region name for anycast-like networks ...
-                                       is_anycast = True
-                               elif region in aws_region_country_map:
-                                       # ... assign looked up country code otherwise ...
-                                       cc = aws_region_country_map[region]
+
+                               # Log a warning for anything else
                                 else:
-                                       # ... and bail out if we are missing something here
-                                       log.warning("Unable to determine country code for line: %s" % snetwork)
+                                       log.warning("%s: Could not determine country code for AWS region %s" \
+                                               % (name, region))
                                         continue
  
-                               # Skip networks with unknown country codes
-                               if not is_anycast and validcountries and cc not in validcountries:
-                                       log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
-                                               (cc, network))
-                                       return
-
-                               # Conduct SQL statement...
-                               self.db.execute("""
-                                       INSERT INTO
-                                               network_feeds
-                                       (
-                                               network,
-                                               source,
-                                               country,
-                                               is_anycast
-                                       )
-                                       VALUES
-                                       (
-                                               %s, %s, %s, %s
-                                       )
-                                       ON CONFLICT (network, source) DO NOTHING
-                                       """, "%s" % network, "Amazon AWS IP feed", cc, is_anycast,
+                       # Write to database
+                       self.db.execute("""
+                               INSERT INTO
+                                       network_feeds
+                               (
+                                       network,
+                                       source,
+                                       country,
+                                       is_anycast
                                 )
+                               VALUES
+                               (
+                                       %s, %s, %s, %s
+                               )
+                               ON CONFLICT (network, source) DO NOTHING
+                               """, "%s" % network, name, cc, is_anycast,
+                       )
  
-       def _update_feed_for_spamhaus_drop(self):
-               downloader = location.importer.Downloader()
-
-               ip_lists = [
-                                       ("SPAMHAUS-DROP", "https://www.spamhaus.org/drop/drop.txt"),
-                                       ("SPAMHAUS-EDROP", "https://www.spamhaus.org/drop/edrop.txt"),
-                                       ("SPAMHAUS-DROPV6", "https://www.spamhaus.org/drop/dropv6.txt")
-                               ]
-
-               asn_lists = [
-                                       ("SPAMHAUS-ASNDROP", "https://www.spamhaus.org/drop/asndrop.json")
-                               ]
-
-               for name, url in ip_lists:
-                       # Fetch IP list from given URL
-                       f = downloader.retrieve(url)
+       def _import_spamhaus_drop(self, name, f):
+               """
+                       Import Spamhaus DROP IP feeds
+               """
+               # Count all lines
+               lines = 0
  
-                       # Split into lines
-                       fcontent = f.readlines()
+               # Walk through all lines
+               for line in f:
+                       # Decode line
+                       line = line.decode("utf-8")
  
-                       with self.db.transaction():
-                               # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
-                               # downloads.
-                               if len(fcontent) > 10:
-                                       self.db.execute("DELETE FROM network_feeds WHERE source = %s", name)
-                               else:
-                                       log.warning("%s (%s) returned likely bogus file, ignored" % (name, url))
-                                       continue
+                       # Strip off any comments
+                       line, _, comment = line.partition(";")
  
-                               # Iterate through every line, filter comments and add remaining networks to
-                               # the override table in case they are valid...
-                               for sline in fcontent:
-                                       # The response is assumed to be encoded in UTF-8...
-                                       sline = sline.decode("utf-8")
+                       # Ignore empty lines
+                       if not line:
+                               continue
  
-                                       # Comments start with a semicolon...
-                                       if sline.startswith(";"):
-                                               continue
+                       # Strip any excess whitespace
+                       line = line.strip()
  
-                                       # Extract network and ignore anything afterwards...
-                                       try:
-                                               network = ipaddress.ip_network(sline.split()[0], strict=False)
-                                       except ValueError:
-                                               log.error("Unable to parse line: %s" % sline)
-                                               continue
+                       # Increment line counter
+                       lines += 1
  
-                                       # Sanitize parsed networks...
-                                       if not self._check_parsed_network(network):
-                                               log.warning("Skipping bogus network found in %s (%s): %s" % \
-                                                       (name, url, network))
-                                               continue
+                       # Parse the network
+                       try:
+                               network = ipaddress.ip_network(line)
+                       except ValueError as e:
+                               log.warning("%s: Could not parse network: %s - %s" % (name, line, e))
+                               continue
  
-                                       # Conduct SQL statement...
-                                       self.db.execute("""
-                                               INSERT INTO
-                                                       network_feeds
-                                               (
-                                                       network,
-                                                       source,
-                                                       is_drop
-                                               )
-                                               VALUES
-                                               (
-                                                       %s, %s, %s
-                                               )""", "%s" % network, name, True,
-                                       )
+                       # Check network
+                       if not self._check_parsed_network(network):
+                               log.warning("%s: Skipping bogus network: %s" % (name, network))
+                               continue
  
-               for name, url in asn_lists:
-                       # Fetch URL
-                       f = downloader.retrieve(url)
+                       # Insert into the database
+                       self.db.execute("""
+                               INSERT INTO
+                                       network_feeds
+                               (
+                                       network,
+                                       source,
+                                       is_drop
+                               )
+                               VALUES
+                               (
+                                       %s, %s, %s
+                               )""", "%s" % network, name, True,
+                       )
  
-                       # Split into lines
-                       fcontent = f.readlines()
+               # Raise an exception if we could not import anything
+               if not lines:
+                       raise RuntimeError("Received bogus feed %s with no data" % name)
  
-                       with self.db.transaction():
-                               # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
-                               # downloads.
-                               if len(fcontent) > 10:
-                                       self.db.execute("DELETE FROM autnum_feeds WHERE source = %s", name)
-                               else:
-                                       log.warning("%s (%s) returned likely bogus file, ignored" % (name, url))
-                                       continue
+       def _import_spamhaus_asndrop(self, name, f):
+               """
+                       Import Spamhaus ASNDROP feed
+               """
+               for line in f:
+                       # Decode the line
+                       line = line.decode("utf-8")
  
-                               # Iterate through every line, filter comments and add remaining ASNs to
-                               # the override table in case they are valid...
-                               for sline in fcontent:
-                                       # The response is assumed to be encoded in UTF-8...
-                                       sline = sline.decode("utf-8")
+                       # Parse JSON
+                       try:
+                               line = json.loads(line)
+                       except json.JSONDecodeError as e:
+                               log.warning("%s: Unable to parse JSON object %s: %s" % (name, line, e))
+                               continue
  
-                                       # Load every line as a JSON object and try to obtain an ASN from it...
-                                       try:
-                                               lineobj = json.loads(sline)
-                                       except json.decoder.JSONDecodeError:
-                                               log.error("Unable to parse line as a JSON object: %s" % sline)
-                                               continue
+                       # Fetch type
+                       type = line.get("type")
  
-                                       # Skip line contiaining file metadata
-                                       try:
-                                               type = lineobj["type"]
+                       # Skip any metadata
+                       if type == "metadata":
+                               continue
  
-                                               if type == "metadata":
-                                                       continue
-                                       except KeyError:
-                                               pass
+                       # Fetch ASN
+                       asn  = line.get("asn")
  
-                                       try:
-                                               asn = lineobj["asn"]
-                                               as_name = lineobj["asname"]
-                                       except KeyError:
-                                               log.warning("Unable to extract necessary information from line: %s" % sline)
-                                               continue
+                       # Skip any lines without an ASN
+                       if not asn:
+                               continue
  
-                                       # Filter invalid ASNs...
-                                       if not self._check_parsed_asn(asn):
-                                               log.warning("Skipping bogus ASN found in %s (%s): %s" % \
-                                                       (name, url, asn))
-                                               continue
+                       # Filter invalid ASNs
+                       if not self._check_parsed_asn(asn):
+                               log.warning("%s: Skipping bogus ASN %s" % (name, asn))
+                               continue
  
-                                       # Conduct SQL statement...
-                                       self.db.execute("""
-                                               INSERT INTO
-                                                       autnum_feeds
-                                               (
-                                                       number,
-                                                       source,
-                                                       is_drop
-                                               )
-                                               VALUES
-                                               (
-                                                       %s, %s, %s
-                                               )""", "%s" % asn, name, True,
-                                       )
+                       # Write to database
+                       self.db.execute("""
+                               INSERT INTO
+                                       autnum_feeds
+                               (
+                                       number,
+                                       source,
+                                       is_drop
+                               )
+                               VALUES
+                               (
+                                       %s, %s, %s
+                               )""", "%s" % asn, name, True,
+                       )
  
         @staticmethod
         def _parse_bool(block, key):
@@ -2064,14 +2197,6 @@ class CLI(object):
                 # Default to None
                 return None
  
-       @property
-       def countries(self):
-               # Fetch all valid country codes to check parsed networks aganist
-               rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
-
-               # Return all countries
-               return [row.country_code for row in rows]
-
         def handle_import_countries(self, ns):
                 with self.db.transaction():
                         # Drop all data that we have
@@ -2104,6 +2229,74 @@ def split_line(line):
  
         return key, val
  
+def read_blocks(f):
+       for block in iterate_over_blocks(f):
+               type = None
+               data = {}
+
+               for i, line in enumerate(block):
+                       key, value = line.split(":", 1)
+
+                       # The key of the first line defines the type
+                       if i == 0:
+                               type = key
+
+                       # Store value
+                       data[key] = value.strip()
+
+               yield type, data
+
+def iterate_over_blocks(f, charsets=("utf-8", "latin1")):
+       block = []
+
+       for line in f:
+               # Skip commented lines
+               if line.startswith(b"#") or line.startswith(b"%"):
+                       continue
+
+               # Convert to string
+               for charset in charsets:
+                       try:
+                               line = line.decode(charset)
+                       except UnicodeDecodeError:
+                               continue
+                       else:
+                               break
+
+               # Remove any comments at the end of line
+               line, hash, comment = line.partition("#")
+
+               # Strip any whitespace at the end of the line
+               line = line.rstrip()
+
+               # If we cut off some comment and the line is empty, we can skip it
+               if comment and not line:
+                       continue
+
+               # If the line has some content, keep collecting it
+               if line:
+                       block.append(line)
+                       continue
+
+               # End the block on an empty line
+               if block:
+                       yield block
+
+               # Reset the block
+               block = []
+
+       # Return the last block
+       if block:
+               yield block
+
+def iterate_over_lines(f):
+       for line in f:
+               # Decode the line
+               line = line.decode()
+
+               # Strip the ending
+               yield line.rstrip()
+
  def main():
         # Run the command line interface
         c = CLI()