]> git.ipfire.org Git - location/libloc.git/commitdiff
location-importer.in: import additional IP information for Amazon AWS IP networks
authorPeter Müller <peter.mueller@ipfire.org>
Tue, 8 Jun 2021 09:55:41 +0000 (09:55 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Thu, 10 Jun 2021 08:54:34 +0000 (08:54 +0000)
Amazon publishes information regarding some of their IP networks
primarily used for AWS cloud services in a machine-readable format. To
improve libloc lookup results for these, we have little choice other
than importing and parsing them.

Unfortunately, there seems to be no machine-readable list of the
locations of their data centers or availability zones available. If
there _is_ any, please let the author know.

The second version of this patch adds a meaningful description for the
"source" column in the overrides tables, to make introduced changes
less intransparent.

Fixes: #12594
Signed-off-by: Peter Müller <peter.mueller@ipfire.org>
Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/python/location-importer.in

index 10e9c4ac9e77f57d7ea15574508f5cb32f3b763f..b235db7b19c949e262bfbeff30b2e233fd7a91f0 100644 (file)
@@ -19,6 +19,7 @@
 
 import argparse
 import ipaddress
+import json
 import logging
 import math
 import re
@@ -1031,6 +1032,10 @@ class CLI(object):
                                TRUNCATE TABLE network_overrides;
                        """)
 
+                       # Update overrides for various cloud providers big enough to publish their own IP
+                       # network allocation lists in a machine-readable format...
+                       self._update_overrides_for_aws()
+
                        for file in ns.files:
                                log.info("Reading %s..." % file)
 
@@ -1106,6 +1111,115 @@ class CLI(object):
                                                else:
                                                        log.warning("Unsupported type: %s" % type)
 
+       def _update_overrides_for_aws(self):
+               # Download Amazon AWS IP allocation file to create overrides...
+               downloader = location.importer.Downloader()
+
+               try:
+                       with downloader.request("https://ip-ranges.amazonaws.com/ip-ranges.json", return_blocks=False) as f:
+                               aws_ip_dump = json.load(f.body)
+               except Exception as e:
+                       log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
+                       return
+
+               # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
+               # there seems to be no machine-readable version available of this other than
+               # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
+               # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
+               # was helpful here as well.
+               aws_region_country_map = {
+                               "af-south-1": "ZA",
+                               "ap-east-1": "HK",
+                               "ap-south-1": "IN",
+                               "ap-south-2": "IN",
+                               "ap-northeast-3": "JP",
+                               "ap-northeast-2": "KR",
+                               "ap-southeast-1": "SG",
+                               "ap-southeast-2": "AU",
+                               "ap-southeast-3": "MY",
+                               "ap-southeast-4": "AU",
+                               "ap-northeast-1": "JP",
+                               "ca-central-1": "CA",
+                               "eu-central-1": "DE",
+                               "eu-central-2": "CH",
+                               "eu-west-1": "IE",
+                               "eu-west-2": "GB",
+                               "eu-south-1": "IT",
+                               "eu-south-2": "ES",
+                               "eu-west-3": "FR",
+                               "eu-north-1": "SE",
+                               "me-central-1": "AE",
+                               "me-south-1": "BH",
+                               "sa-east-1": "BR"
+                               }
+
+               # Fetch all valid country codes to check parsed networks aganist...
+               rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
+               validcountries = []
+
+               for row in rows:
+                       validcountries.append(row.country_code)
+
+               with self.db.transaction():
+                       for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
+                               try:
+                                       network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
+                               except ValueError:
+                                       log.warning("Unable to parse line: %s" % snetwork)
+                                       continue
+
+                               # Sanitize parsed networks...
+                               if not self._check_parsed_network(network):
+                                       continue
+
+                               # Determine region of this network...
+                               region = snetwork["region"]
+                               cc = None
+                               is_anycast = False
+
+                               # Any region name starting with "us-" will get "US" country code assigned straight away...
+                               if region.startswith("us-"):
+                                       cc = "US"
+                               elif region.startswith("cn-"):
+                                       # ... same goes for China ...
+                                       cc = "CN"
+                               elif region == "GLOBAL":
+                                       # ... funny region name for anycast-like networks ...
+                                       is_anycast = True
+                               elif region in aws_region_country_map:
+                                       # ... assign looked up country code otherwise ...
+                                       cc = aws_region_country_map[region]
+                               else:
+                                       # ... and bail out if we are missing something here
+                                       log.warning("Unable to determine country code for line: %s" % snetwork)
+                                       continue
+
+                               # Skip networks with unknown country codes
+                               if not is_anycast and validcountries and cc not in validcountries:
+                                       log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
+                                               (cc, network))
+                                       return
+
+                               # Conduct SQL statement...
+                               self.db.execute("""
+                                       INSERT INTO network_overrides(
+                                               network,
+                                               country,
+                                               source,
+                                               is_anonymous_proxy,
+                                               is_satellite_provider,
+                                               is_anycast
+                                       ) VALUES (%s, %s, %s, %s, %s, %s)
+                                       ON CONFLICT (network) DO NOTHING""",
+                                       "%s" % network,
+                                       cc,
+                                       "Amazon AWS IP feed",
+                                       None,
+                                       None,
+                                       is_anycast,
+                               )
+
+
        @staticmethod
        def _parse_bool(block, key):
                val = block.get(key)