###############################################################################
import argparse
-import concurrent.futures
import csv
+import functools
import http.client
import ipaddress
import json
# Load our location module
import location
import location.database
-import location.importer
+from location.downloader import Downloader
from location.i18n import _
# Initialise logging
(131072, 4199999999),
)
+TRANSLATED_COUNTRIES = {
+ # When people say UK, they mean GB
+ "UK" : "GB",
+}
+
+IGNORED_COUNTRIES = set((
+ # Formerly Yugoslavia
+ "YU",
+
+ # Some people use ZZ to say "no country" or to hide the country
+ "ZZ",
+))
+
# Configure the CSV parser for ARIN
csv.register_dialect("arin", delimiter=",", quoting=csv.QUOTE_ALL, quotechar="\"")
# Update WHOIS
update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
+ update_whois.add_argument("sources", nargs="*",
+ help=_("Only update these sources"))
update_whois.set_defaults(func=self.handle_update_whois)
# Update announcements
help=_("Update Geofeeds"))
update_geofeeds.set_defaults(func=self.handle_update_geofeeds)
+ # Update feeds
+ update_feeds = subparsers.add_parser("update-feeds",
+ help=_("Update Feeds"))
+ update_feeds.add_argument("feeds", nargs="*",
+ help=_("Only update these feeds"))
+ update_feeds.set_defaults(func=self.handle_update_feeds)
+
# Update overrides
update_overrides = subparsers.add_parser("update-overrides",
help=_("Update overrides"),
# Parse command line arguments
args = self.parse_cli()
+ # Initialize the downloader
+ self.downloader = Downloader()
+
# Initialise database
self.db = self._setup_database(args)
CREATE INDEX IF NOT EXISTS geofeed_networks_search
ON geofeed_networks USING GIST(network inet_ops);
CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
+ ALTER TABLE network_geofeeds ADD COLUMN IF NOT EXISTS source text NOT NULL;
CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique
ON network_geofeeds(network);
CREATE INDEX IF NOT EXISTS network_geofeeds_search
);
CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
ON autnum_overrides(number);
- ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
+ ALTER TABLE autnum_overrides DROP COLUMN IF EXISTS source;
CREATE TABLE IF NOT EXISTS network_overrides(
network inet NOT NULL,
ON network_overrides(network);
CREATE INDEX IF NOT EXISTS network_overrides_search
ON network_overrides USING GIST(network inet_ops);
- ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
+ ALTER TABLE network_overrides DROP COLUMN IF EXISTS source;
""")
return db
+ def fetch_countries(self):
+ """
+ Returns a list of all countries on the list
+ """
+ # Fetch all valid country codes to check parsed networks aganist...
+ countries = self.db.query("SELECT country_code FROM countries ORDER BY country_code")
+
+ return set((country.country_code for country in countries))
+
def handle_write(self, ns):
"""
Compiles a database in libloc format out of what is in the database
writer.write(file)
def handle_update_whois(self, ns):
- downloader = location.importer.Downloader()
-
# Did we run successfully?
- error = False
+ success = True
+
+ sources = (
+ # African Network Information Centre
+ ("AFRINIC", (
+ (self._import_standard_format, "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz"),
+ )),
+
+ # Asia Pacific Network Information Centre
+ ("APNIC", (
+ (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz"),
+ (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz"),
+ (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz"),
+ (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz"),
+ )),
+
+ # American Registry for Internet Numbers
+ ("ARIN", (
+ (self._import_extended_format, "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"),
+ (self._import_arin_as_names, "https://ftp.arin.net/pub/resource_registry_service/asns.csv"),
+ )),
+
+ # Japan Network Information Center
+ ("JPNIC", (
+ (self._import_standard_format, "https://ftp.nic.ad.jp/jpirr/jpirr.db.gz"),
+ )),
+
+ # Latin America and Caribbean Network Information Centre
+ ("LACNIC", (
+ (self._import_standard_format, "https://ftp.lacnic.net/lacnic/dbase/lacnic.db.gz"),
+ (self._import_extended_format, "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"),
+ )),
+
+ # Réseaux IP Européens
+ ("RIPE", (
+ (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz"),
+ (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz"),
+ (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz"),
+ (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz"),
+ )),
+ )
# Fetch all valid country codes to check parsed networks against
- validcountries = self.countries
+ countries = self.fetch_countries()
+
+ # Check if we have countries
+ if not countries:
+ log.error("Please import countries before importing any WHOIS data")
+ return 1
# Iterate over all potential sources
- for source in sorted(location.importer.SOURCES):
- with self.db.transaction():
- # Create some temporary tables to store parsed data
- self.db.execute("""
- CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
- organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
- CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
-
- CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
- name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
- CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
-
- CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL,
- original_countries text[] NOT NULL, source text NOT NULL)
- ON COMMIT DROP;
- CREATE INDEX _rirdata_search ON _rirdata
- USING BTREE(family(network), masklen(network));
- CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
- """)
-
- # Remove all previously imported content
- self.db.execute("DELETE FROM autnums WHERE source = %s", source)
- self.db.execute("DELETE FROM networks WHERE source = %s", source)
+ for name, feeds in sources:
+ # Skip anything that should not be updated
+ if ns.sources and not name in ns.sources:
+ continue
- try:
- # Fetch WHOIS sources
- for url in location.importer.WHOIS_SOURCES.get(source, []):
- for block in downloader.request_blocks(url):
- self._parse_block(block, source, validcountries)
-
- # Fetch extended sources
- for url in location.importer.EXTENDED_SOURCES.get(source, []):
- for line in downloader.request_lines(url):
- self._parse_line(line, source, validcountries)
- except urllib.error.URLError as e:
- log.error("Could not retrieve data from %s: %s" % (source, e))
- error = True
-
- # Continue with the next source
- continue
+ try:
+ self._process_source(name, feeds, countries)
+
+ # Log an error but continue if an exception occurs
+ except Exception as e:
+ log.error("Error processing source %s" % name, exc_info=True)
+ success = False
+
+ # Return a non-zero exit code for errors
+ return 0 if success else 1
+
+ def _process_source(self, source, feeds, countries):
+ """
+ This function processes one source
+ """
+ # Wrap everything into one large transaction
+ with self.db.transaction():
+ # Remove all previously imported content
+ self.db.execute("DELETE FROM autnums WHERE source = %s", source)
+ self.db.execute("DELETE FROM networks WHERE source = %s", source)
+ self.db.execute("DELETE FROM network_geofeeds WHERE source = %s", source)
+
+ # Create some temporary tables to store parsed data
+ self.db.execute("""
+ CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
+ organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
+ CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
+
+ CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
+ name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
+ CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
+
+ CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text,
+ original_countries text[] NOT NULL, source text NOT NULL)
+ ON COMMIT DROP;
+ CREATE INDEX _rirdata_search ON _rirdata
+ USING BTREE(family(network), masklen(network));
+ CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
+ """)
- # Process all parsed networks from every RIR we happen to have access to,
- # insert the largest network chunks into the networks table immediately...
- families = self.db.query("""
- SELECT DISTINCT
- family(network) AS family
+ # Parse all feeds
+ for callback, url, *args in feeds:
+ # Retrieve the feed
+ f = self.downloader.retrieve(url)
+
+ # Call the callback
+ callback(source, countries, f, *args)
+
+ # Process all parsed networks from every RIR we happen to have access to,
+ # insert the largest network chunks into the networks table immediately...
+ families = self.db.query("""
+ SELECT DISTINCT
+ family(network) AS family
+ FROM
+ _rirdata
+ ORDER BY
+ family(network)
+ """,
+ )
+
+ for family in (row.family for row in families):
+ # Fetch the smallest mask length in our data set
+ smallest = self.db.get("""
+ SELECT
+ MIN(
+ masklen(network)
+ ) AS prefix
FROM
_rirdata
- ORDER BY
- family(network)
- """,
+ WHERE
+ family(network) = %s
+ """, family,
)
- for family in (row.family for row in families):
- # Fetch the smallest mask length in our data set
- smallest = self.db.get("""
- SELECT
- MIN(
- masklen(network)
- ) AS prefix
- FROM
- _rirdata
- WHERE
- family(network) = %s
- """, family,
+ # Copy all networks
+ self.db.execute("""
+ INSERT INTO
+ networks
+ (
+ network,
+ country,
+ original_countries,
+ source
)
+ SELECT
+ network,
+ country,
+ original_countries,
+ source
+ FROM
+ _rirdata
+ WHERE
+ masklen(network) = %s
+ AND
+ family(network) = %s
+ ON CONFLICT DO
+ NOTHING""",
+ smallest.prefix,
+ family,
+ )
- # Copy all networks
+ # ... determine any other prefixes for this network family, ...
+ prefixes = self.db.query("""
+ SELECT
+ DISTINCT masklen(network) AS prefix
+ FROM
+ _rirdata
+ WHERE
+ family(network) = %s
+ ORDER BY
+ masklen(network) ASC
+ OFFSET 1
+ """, family,
+ )
+
+ # ... and insert networks with this prefix in case they provide additional
+ # information (i. e. subnet of a larger chunk with a different country)
+ for prefix in (row.prefix for row in prefixes):
self.db.execute("""
- INSERT INTO
- networks
- (
- network,
- country,
- original_countries,
- source
+ WITH candidates AS (
+ SELECT
+ _rirdata.network,
+ _rirdata.country,
+ _rirdata.original_countries,
+ _rirdata.source
+ FROM
+ _rirdata
+ WHERE
+ family(_rirdata.network) = %s
+ AND
+ masklen(_rirdata.network) = %s
+ ),
+ filtered AS (
+ SELECT
+ DISTINCT ON (c.network)
+ c.network,
+ c.country,
+ c.original_countries,
+ c.source,
+ masklen(networks.network),
+ networks.country AS parent_country
+ FROM
+ candidates c
+ LEFT JOIN
+ networks
+ ON
+ c.network << networks.network
+ ORDER BY
+ c.network,
+ masklen(networks.network) DESC NULLS LAST
)
+ INSERT INTO
+ networks(network, country, original_countries, source)
SELECT
network,
country,
original_countries,
source
FROM
- _rirdata
+ filtered
WHERE
- masklen(network) = %s
- AND
- family(network) = %s
- ON CONFLICT DO
- NOTHING""",
- smallest.prefix,
- family,
+ parent_country IS NULL
+ OR
+ country <> parent_country
+ ON CONFLICT DO NOTHING
+ """, family, prefix,
)
- # ... determine any other prefixes for this network family, ...
- prefixes = self.db.query("""
- SELECT
- DISTINCT masklen(network) AS prefix
- FROM
- _rirdata
- WHERE
- family(network) = %s
- ORDER BY
- masklen(network) ASC
- OFFSET 1
- """, family,
- )
+ self.db.execute("""
+ INSERT INTO
+ autnums
+ (
+ number,
+ name,
+ source
+ )
+ SELECT
+ _autnums.number,
+ _organizations.name,
+ _organizations.source
+ FROM
+ _autnums
+ JOIN
+ _organizations ON _autnums.organization = _organizations.handle
+ ON CONFLICT
+ (
+ number
+ )
+ DO UPDATE
+ SET name = excluded.name
+ """,
+ )
- # ... and insert networks with this prefix in case they provide additional
- # information (i. e. subnet of a larger chunk with a different country)
- for prefix in (row.prefix for row in prefixes):
- self.db.execute("""
- WITH candidates AS (
- SELECT
- _rirdata.network,
- _rirdata.country,
- _rirdata.original_countries,
- _rirdata.source
- FROM
- _rirdata
- WHERE
- family(_rirdata.network) = %s
- AND
- masklen(_rirdata.network) = %s
- ),
- filtered AS (
- SELECT
- DISTINCT ON (c.network)
- c.network,
- c.country,
- c.original_countries,
- c.source,
- masklen(networks.network),
- networks.country AS parent_country
- FROM
- candidates c
- LEFT JOIN
- networks
- ON
- c.network << networks.network
- ORDER BY
- c.network,
- masklen(networks.network) DESC NULLS LAST
- )
- INSERT INTO
- networks(network, country, original_countries, source)
- SELECT
- network,
- country,
- original_countries,
- source
- FROM
- filtered
- WHERE
- parent_country IS NULL
- OR
- country <> parent_country
- ON CONFLICT DO NOTHING
- """, family, prefix,
- )
+ def _import_standard_format(self, source, countries, f, *args):
+ """
+ Imports a single standard format source feed
+ """
+ # Iterate over all blocks
+ for block in iterate_over_blocks(f):
+ self._parse_block(block, source, countries)
+
+ def _import_extended_format(self, source, countries, f, *args):
+ # Iterate over all lines
+ for line in iterate_over_lines(f):
+ self._parse_line(block, source, countries)
+
+ def _import_arin_as_names(self, source, countries, f, *args):
+ # Walk through the file
+ for line in csv.DictReader(feed, dialect="arin"):
+ log.debug("Processing object: %s" % line)
+
+ # Fetch status
+ status = line.get("Status")
+
+ # We are only interested in anything managed by ARIN
+ if not status == "Full Registry Services":
+ continue
+
+ # Fetch organization name
+ name = line.get("Org Name")
+
+ # Extract ASNs
+ first_asn = line.get("Start AS Number")
+ last_asn = line.get("End AS Number")
+
+ # Cast to a number
+ try:
+ first_asn = int(first_asn)
+ except TypeError as e:
+ log.warning("Could not parse ASN '%s'" % first_asn)
+ continue
+
+ try:
+ last_asn = int(last_asn)
+ except TypeError as e:
+ log.warning("Could not parse ASN '%s'" % last_asn)
+ continue
+
+ # Check if the range is valid
+ if last_asn < first_asn:
+ log.warning("Invalid ASN range %s-%s" % (first_asn, last_asn))
+
+ # Insert everything into the database
+ for asn in range(first_asn, last_asn + 1):
+ if not self._check_parsed_asn(asn):
+ log.warning("Skipping invalid ASN %s" % asn)
+ continue
self.db.execute("""
INSERT INTO
name,
source
)
- SELECT
- _autnums.number,
- _organizations.name,
- _organizations.source
- FROM
- _autnums
- JOIN
- _organizations ON _autnums.organization = _organizations.handle
+ VALUES
+ (
+ %s, %s, %s
+ )
ON CONFLICT
(
number
)
- DO UPDATE
- SET name = excluded.name
- """,
+ DO NOTHING
+ """, asn, name, "ARIN",
)
- # Download and import (technical) AS names from ARIN
- with self.db.transaction():
- self._import_as_names_from_arin(downloader)
-
- # Return a non-zero exit code for errors
- return 1 if error else 0
-
def _check_parsed_network(self, network):
"""
Assistive function to detect and subsequently sort out parsed
(b) covering a too large chunk of the IP address space (prefix length
is < 7 for IPv4 networks, and < 10 for IPv6)
(c) "0.0.0.0" or "::" as a network address
- (d) are too small for being publicly announced (we have decided not to
- process them at the moment, as they significantly enlarge our
- database without providing very helpful additional information)
This unfortunately is necessary due to brain-dead clutter across
various RIR databases, causing mismatches and eventually disruptions.
We will return False in case a network is not suitable for adding
it to our database, and True otherwise.
"""
+ # Check input
+ if isinstance(network, ipaddress.IPv6Network):
+ pass
+ elif isinstance(network, ipaddress.IPv4Network):
+ pass
+ else:
+ raise ValueError("Invalid network: %s (type %s)" % (network, type(network)))
- if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
- return False
-
+ # Ignore anything that isn't globally routable
if not network.is_global:
log.debug("Skipping non-globally routable network: %s" % network)
return False
- if network.version == 4:
- if network.prefixlen < 7:
- log.debug("Skipping too big IP chunk: %s" % network)
- return False
-
- if network.prefixlen > 24:
- log.debug("Skipping network too small to be publicly announced: %s" % network)
- return False
-
- if str(network.network_address) == "0.0.0.0":
- log.debug("Skipping network based on 0.0.0.0: %s" % network)
- return False
+ # Ignore anything that is unspecified IP range (See RFC 5735 for IPv4 or RFC 2373 for IPv6)
+ elif network.is_unspecified:
+ log.debug("Skipping unspecified network: %s" % network)
+ return False
- elif network.version == 6:
+ # IPv6
+ if network.version == 6:
if network.prefixlen < 10:
log.debug("Skipping too big IP chunk: %s" % network)
return False
- if network.prefixlen > 48:
- log.debug("Skipping network too small to be publicly announced: %s" % network)
- return False
-
- if str(network.network_address) == "::":
- log.debug("Skipping network based on '::': %s" % network)
+ # IPv4
+ elif network.version == 4:
+ if network.prefixlen < 7:
+ log.debug("Skipping too big IP chunk: %s" % network)
return False
- else:
- # This should not happen...
- log.warning("Skipping network of unknown family, this should not happen: %s" % network)
- return False
-
# In case we have made it here, the network is considered to
# be suitable for libloc consumption...
return True
log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
return False
- def _parse_block(self, block, source_key, validcountries = None):
+ def _parse_block(self, block, source_key, countries):
# Get first line to find out what type of block this is
line = block[0]
# inetnum
if line.startswith("inet6num:") or line.startswith("inetnum:"):
- return self._parse_inetnum_block(block, source_key, validcountries)
+ return self._parse_inetnum_block(block, source_key, countries)
# organisation
elif line.startswith("organisation:"):
autnum.get("asn"), autnum.get("org"), source_key,
)
- def _parse_inetnum_block(self, block, source_key, validcountries = None):
- log.debug("Parsing inetnum block:")
-
+ def _parse_inetnum_block(self, block, source_key, countries):
inetnum = {}
for line in block:
- log.debug(line)
-
# Split line
key, val = split_line(line)
inetnum[key] = [ipaddress.ip_network(val, strict=False)]
elif key == "country":
- val = val.upper()
+ cc = val.upper()
- # Catch RIR data objects with more than one country code...
- if not key in inetnum:
- inetnum[key] = []
- else:
- if val in inetnum.get("country"):
- # ... but keep this list distinct...
- continue
+ # Ignore certain country codes
+ if cc in IGNORED_COUNTRIES:
+ log.debug("Ignoring country code '%s'" % cc)
+ continue
+
+ # Translate country codes
+ try:
+ cc = TRANSLATED_COUNTRIES[cc]
+ except KeyError:
+ pass
- # When people set country codes to "UK", they actually mean "GB"
- if val == "UK":
- val = "GB"
+ # Do we know this country?
+ if not cc in countries:
+ log.warning("Skipping invalid country code '%s'" % cc)
+ continue
- inetnum[key].append(val)
+ try:
+ inetnum[key].append(cc)
+ except KeyError:
+ inetnum[key] = [cc]
# Parse the geofeed attribute
elif key == "geofeed":
inetnum["geofeed"] = m.group(1)
# Skip empty objects
- if not inetnum or not "country" in inetnum:
+ if not inetnum:
return
- # Prepare skipping objects with unknown country codes...
- invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
-
# Iterate through all networks enumerated from above, check them for plausibility and insert
# them into the database, if _check_parsed_network() succeeded
for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
- if self._check_parsed_network(single_network):
- # Skip objects with unknown country codes if they are valid to avoid log spam...
- if validcountries and invalidcountries:
- log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
- (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
- break
+ if not self._check_parsed_network(single_network):
+ continue
+
+ # Fetch the countries or use a list with an empty country
+ countries = inetnum.get("country", [None])
- # Everything is fine here, run INSERT statement...
- self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
- VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
- "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
+ # Insert the network into the database but only use the first country code
+ for cc in countries:
+ self.db.execute("""
+ INSERT INTO
+ _rirdata
+ (
+ network,
+ country,
+ original_countries,
+ source
+ )
+ VALUES
+ (
+ %s, %s, %s, %s
+ )
+ ON CONFLICT (network)
+ DO UPDATE SET country = excluded.country
+ """, "%s" % single_network, cc, [cc for cc in countries if cc], source_key,
)
- # Update any geofeed information
- geofeed = inetnum.get("geofeed", None)
- if geofeed:
- self._parse_geofeed(geofeed, single_network)
+ # If there are more than one country, we will only use the first one
+ break
- # Delete any previous geofeeds
- else:
- self.db.execute("DELETE FROM network_geofeeds WHERE network = %s",
- "%s" % single_network)
+ # Update any geofeed information
+ geofeed = inetnum.get("geofeed", None)
+ if geofeed:
+ self._parse_geofeed(source_key, geofeed, single_network)
- def _parse_geofeed(self, url, single_network):
+ def _parse_geofeed(self, source, url, single_network):
# Parse the URL
url = urllib.parse.urlparse(url)
# Store/update any geofeeds
self.db.execute("""
INSERT INTO
- network_geofeeds(
- network,
- url
- )
- VALUES(
- %s, %s
+ network_geofeeds
+ (
+ network,
+ url,
+ source
+ )
+ VALUES
+ (
+ %s, %s, %s
)
ON CONFLICT (network) DO
UPDATE SET url = excluded.url""",
- "%s" % single_network, url,
+ "%s" % single_network, url, source,
)
def _parse_org_block(self, block, source_key):
org.get("organisation"), org.get("org-name"), source_key,
)
- def _parse_line(self, line, source_key, validcountries = None):
+ def _parse_line(self, line, source_key, validcountries=None):
# Skip version line
if line.startswith("2"):
return
log.warning("Could not parse line: %s" % line)
return
+ # Skip any unknown protocols
+ if not type in ("ipv6", "ipv4"):
+ log.warning("Unknown IP protocol '%s'" % type)
+ return
+
# Skip any lines that are for stats only or do not have a country
# code at all (avoids log spam below)
if not country_code or country_code == '*':
(country_code, line))
return
- if type in ("ipv6", "ipv4"):
- return self._parse_ip_line(country_code, type, line, source_key)
-
- def _parse_ip_line(self, country, type, line, source_key):
try:
address, prefix, date, status, organization = line.split("|")
except ValueError:
if not self._check_parsed_network(network):
return
- self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
- VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
- UPDATE SET country = excluded.country",
- "%s" % network, country, [country], source_key,
+ self.db.execute("""
+ INSERT INTO
+ networks
+ (
+ network,
+ country,
+ original_countries,
+ source
+ )
+ VALUES
+ (
+ %s, %s, %s, %s
+ )
+ ON CONFLICT (network)
+ DO UPDATE SET country = excluded.country
+ """, "%s" % network, country_code, [country], source_key,
)
- def _import_as_names_from_arin(self, downloader):
- # Delete all previously imported content
- self.db.execute("DELETE FROM autnums WHERE source = %s", "ARIN")
-
- # Try to retrieve the feed from ftp.arin.net
- feed = downloader.request_lines("https://ftp.arin.net/pub/resource_registry_service/asns.csv")
-
- # Walk through the file
- for line in csv.DictReader(feed, dialect="arin"):
- log.debug("Processing object: %s" % line)
-
- # Fetch status
- status = line.get("Status")
-
- # We are only interested in anything managed by ARIN
- if not status == "Full Registry Services":
- continue
-
- # Fetch organization name
- name = line.get("Org Name")
-
- # Extract ASNs
- first_asn = line.get("Start AS Number")
- last_asn = line.get("End AS Number")
-
- # Cast to a number
- try:
- first_asn = int(first_asn)
- except TypeError as e:
- log.warning("Could not parse ASN '%s'" % first_asn)
- continue
-
- try:
- last_asn = int(last_asn)
- except TypeError as e:
- log.warning("Could not parse ASN '%s'" % last_asn)
- continue
-
- # Check if the range is valid
- if last_asn < first_asn:
- log.warning("Invalid ASN range %s-%s" % (first_asn, last_asn))
-
- # Insert everything into the database
- for asn in range(first_asn, last_asn + 1):
- if not self._check_parsed_asn(asn):
- log.warning("Skipping invalid ASN %s" % asn)
- continue
-
- self.db.execute("""
- INSERT INTO
- autnums
- (
- number,
- name,
- source
- )
- VALUES
- (
- %s, %s, %s
- )
- ON CONFLICT
- (
- number
- )
- DO NOTHING
- """, asn, name, "ARIN",
- )
-
def handle_update_announcements(self, ns):
server = ns.server[0]
id
""")
- with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
- results = executor.map(self._fetch_geofeed, geofeeds)
-
- # Fetch all results to raise any exceptions
- for result in results:
- pass
+ # Update all geofeeds
+ for geofeed in geofeeds:
+ with self.db.transaction():
+ self._fetch_geofeed(geofeed)
# Delete data from any feeds that did not update in the last two weeks
with self.db.transaction():
with self.db.transaction():
# Open the URL
try:
- req = urllib.request.Request(geofeed.url, headers={
+ # Send the request
+ f = self.downloader.retrieve(geofeed.url, headers={
"User-Agent" : "location/%s" % location.__version__,
# We expect some plain text file in CSV format
- "Accept" : "text/csv, text/plain",
+ "Accept" : "text/csv, text/plain",
})
- # XXX set proxy
+ # Remove any previous data
+ self.db.execute("DELETE FROM geofeed_networks \
+ WHERE geofeed_id = %s", geofeed.id)
- # Send the request
- with urllib.request.urlopen(req, timeout=10) as f:
- # Remove any previous data
- self.db.execute("DELETE FROM geofeed_networks \
- WHERE geofeed_id = %s", geofeed.id)
+ lineno = 0
- lineno = 0
+ # Read the output line by line
+ for line in f:
+ lineno += 1
- # Read the output line by line
- for line in f:
- lineno += 1
+ try:
+ line = line.decode()
- try:
- line = line.decode()
+ # Ignore any lines we cannot decode
+ except UnicodeDecodeError:
+ log.debug("Could not decode line %s in %s" \
+ % (lineno, geofeed.url))
+ continue
- # Ignore any lines we cannot decode
- except UnicodeDecodeError:
- log.debug("Could not decode line %s in %s" \
- % (lineno, geofeed.url))
- continue
+ # Strip any newline
+ line = line.rstrip()
- # Strip any newline
- line = line.rstrip()
+ # Skip empty lines
+ if not line:
+ continue
- # Skip empty lines
- if not line:
- continue
+ # Try to parse the line
+ try:
+ fields = line.split(",", 5)
+ except ValueError:
+ log.debug("Could not parse line: %s" % line)
+ continue
- # Try to parse the line
- try:
- fields = line.split(",", 5)
- except ValueError:
- log.debug("Could not parse line: %s" % line)
- continue
+ # Check if we have enough fields
+ if len(fields) < 4:
+ log.debug("Not enough fields in line: %s" % line)
+ continue
- # Check if we have enough fields
- if len(fields) < 4:
- log.debug("Not enough fields in line: %s" % line)
- continue
+ # Fetch all fields
+ network, country, region, city, = fields[:4]
- # Fetch all fields
- network, country, region, city, = fields[:4]
+ # Try to parse the network
+ try:
+ network = ipaddress.ip_network(network, strict=False)
+ except ValueError:
+ log.debug("Could not parse network: %s" % network)
+ continue
- # Try to parse the network
- try:
- network = ipaddress.ip_network(network, strict=False)
- except ValueError:
- log.debug("Could not parse network: %s" % network)
- continue
-
- # Strip any excess whitespace from country codes
- country = country.strip()
-
- # Make the country code uppercase
- country = country.upper()
-
- # Check the country code
- if not country:
- log.debug("Empty country code in Geofeed %s line %s" \
- % (geofeed.url, lineno))
- continue
-
- elif not location.country_code_is_valid(country):
- log.debug("Invalid country code in Geofeed %s:%s: %s" \
- % (geofeed.url, lineno, country))
- continue
-
- # Write this into the database
- self.db.execute("""
- INSERT INTO
- geofeed_networks (
- geofeed_id,
- network,
- country,
- region,
- city
- )
- VALUES (%s, %s, %s, %s, %s)""",
- geofeed.id,
- "%s" % network,
- country,
- region,
- city,
- )
+ # Strip any excess whitespace from country codes
+ country = country.strip()
+
+ # Make the country code uppercase
+ country = country.upper()
+
+ # Check the country code
+ if not country:
+ log.debug("Empty country code in Geofeed %s line %s" \
+ % (geofeed.url, lineno))
+ continue
+
+ elif not location.country_code_is_valid(country):
+ log.debug("Invalid country code in Geofeed %s:%s: %s" \
+ % (geofeed.url, lineno, country))
+ continue
+
+ # Write this into the database
+ self.db.execute("""
+ INSERT INTO
+ geofeed_networks (
+ geofeed_id,
+ network,
+ country,
+ region,
+ city
+ )
+ VALUES (%s, %s, %s, %s, %s)""",
+ geofeed.id,
+ "%s" % network,
+ country,
+ region,
+ city,
+ )
# Catch any HTTP errors
except urllib.request.HTTPError as e:
def handle_update_overrides(self, ns):
with self.db.transaction():
- # Only drop manually created overrides, as we can be reasonably sure to have them,
- # and preserve the rest. If appropriate, it is deleted by correspondent functions.
- self.db.execute("""
- DELETE FROM autnum_overrides WHERE source = 'manual';
- DELETE FROM network_overrides WHERE source = 'manual';
- """)
-
- # Update overrides for various cloud providers big enough to publish their own IP
- # network allocation lists in a machine-readable format...
- self._update_overrides_for_aws()
-
- # Update overrides for Spamhaus DROP feeds...
- self._update_feed_for_spamhaus_drop()
+ # Drop any previous content
+ self.db.execute("TRUNCATE TABLE autnum_overrides")
+ self.db.execute("TRUNCATE TABLE network_overrides")
for file in ns.files:
log.info("Reading %s..." % file)
with open(file, "rb") as f:
- for type, block in location.importer.read_blocks(f):
+ for type, block in read_blocks(f):
if type == "net":
network = block.get("net")
# Try to parse and normalise the network
continue
self.db.execute("""
- INSERT INTO network_overrides(
+ INSERT INTO
+ network_overrides
+ (
network,
country,
- source,
is_anonymous_proxy,
is_satellite_provider,
is_anycast,
is_drop
- ) VALUES (%s, %s, %s, %s, %s, %s, %s)
- ON CONFLICT (network) DO NOTHING""",
+ )
+ VALUES
+ (
+ %s, %s, %s, %s, %s, %s
+ )
+ ON CONFLICT (network) DO NOTHING
+ """,
"%s" % network,
block.get("country"),
- "manual",
self._parse_bool(block, "is-anonymous-proxy"),
self._parse_bool(block, "is-satellite-provider"),
self._parse_bool(block, "is-anycast"),
autnum = autnum[2:]
self.db.execute("""
- INSERT INTO autnum_overrides(
+ INSERT INTO
+ autnum_overrides
+ (
number,
name,
country,
- source,
is_anonymous_proxy,
is_satellite_provider,
is_anycast,
is_drop
- ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
- ON CONFLICT DO NOTHING""",
+ )
+ VALUES
+ (
+ %s, %s, %s, %s, %s, %s, %s
+ )
+ ON CONFLICT (number) DO NOTHING
+ """,
autnum,
block.get("name"),
block.get("country"),
- "manual",
self._parse_bool(block, "is-anonymous-proxy"),
self._parse_bool(block, "is-satellite-provider"),
self._parse_bool(block, "is-anycast"),
else:
log.warning("Unsupported type: %s" % type)
- def _update_overrides_for_aws(self):
- # Download Amazon AWS IP allocation file to create overrides...
- downloader = location.importer.Downloader()
+ def handle_update_feeds(self, ns):
+ """
+ Update any third-party feeds
+ """
+ success = True
- try:
- # Fetch IP ranges
- f = downloader.retrieve("https://ip-ranges.amazonaws.com/ip-ranges.json")
+ feeds = (
+ # AWS IP Ranges
+ ("AWS-IP-RANGES", self._import_aws_ip_ranges, "https://ip-ranges.amazonaws.com/ip-ranges.json"),
- # Parse downloaded file
- aws_ip_dump = json.load(f)
- except Exception as e:
- log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
- return
+ # Spamhaus DROP
+ ("SPAMHAUS-DROP", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/drop.txt"),
+ ("SPAMHAUS-EDROP", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/edrop.txt"),
+ ("SPAMHAUS-DROPV6", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/dropv6.txt"),
- # At this point, we can assume the downloaded file to be valid
- self.db.execute("""
- DELETE FROM network_feeds WHERE source = 'Amazon AWS IP feed'
- """)
+ # Spamhaus ASNDROP
+ ("SPAMHAUS-ASNDROP", self._import_spamhaus_asndrop, "https://www.spamhaus.org/drop/asndrop.json"),
+ )
+
+ # Drop any data from feeds that we don't support (any more)
+ with self.db.transaction():
+ # Fetch the names of all feeds we support
+ sources = [name for name, *rest in feeds]
+
+ self.db.execute("DELETE FROM autnum_feeds WHERE NOT source = ANY(%s)", sources)
+ self.db.execute("DELETE FROM network_feeds WHERE NOT source = ANY(%s)", sources)
+
+ # Walk through all feeds
+ for name, callback, url, *args in feeds:
+ # Skip any feeds that were not requested on the command line
+ if ns.feeds and not name in ns.feeds:
+ continue
+
+ try:
+ self._process_feed(name, callback, url, *args)
+
+ # Log an error but continue if an exception occurs
+ except Exception as e:
+ log.error("Error processing feed '%s': %s" % (name, e))
+ success = False
+
+ # Return status
+ return 0 if success else 1
+
+ def _process_feed(self, name, callback, url, *args):
+ """
+ Processes one feed
+ """
+ # Open the URL
+ f = self.downloader.retrieve(url)
+
+ with self.db.transaction():
+ # Drop any previous content
+ self.db.execute("DELETE FROM autnum_feeds WHERE source = %s", name)
+ self.db.execute("DELETE FROM network_feeds WHERE source = %s", name)
+
+ # Call the callback to process the feed
+ return callback(name, f, *args)
- # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
+ def _import_aws_ip_ranges(self, name, f):
+ # Parse the feed
+ feed = json.load(f)
+
+ # Set up a dictionary for mapping a region name to a country. Unfortunately,
# there seems to be no machine-readable version available of this other than
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
# (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
# was helpful here as well.
aws_region_country_map = {
- "af-south-1": "ZA",
- "ap-east-1": "HK",
- "ap-south-1": "IN",
- "ap-south-2": "IN",
- "ap-northeast-3": "JP",
- "ap-northeast-2": "KR",
- "ap-southeast-1": "SG",
- "ap-southeast-2": "AU",
- "ap-southeast-3": "MY",
- "ap-southeast-4": "AU",
- "ap-southeast-5": "NZ", # Auckland, NZ
- "ap-southeast-6": "AP", # XXX: Precise location not documented anywhere
- "ap-northeast-1": "JP",
- "ca-central-1": "CA",
- "ca-west-1": "CA",
- "eu-central-1": "DE",
- "eu-central-2": "CH",
- "eu-west-1": "IE",
- "eu-west-2": "GB",
- "eu-south-1": "IT",
- "eu-south-2": "ES",
- "eu-west-3": "FR",
- "eu-north-1": "SE",
- "eusc-de-east-1" : "DE", # XXX: Undocumented, likely located in Berlin rather than Frankfurt
- "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
- "me-central-1": "AE",
- "me-south-1": "BH",
- "sa-east-1": "BR"
- }
+ # Africa
+ "af-south-1" : "ZA",
+
+ # Asia
+ "il-central-1" : "IL", # Tel Aviv
+
+ # Asia/Pacific
+ "ap-northeast-1" : "JP",
+ "ap-northeast-2" : "KR",
+ "ap-northeast-3" : "JP",
+ "ap-east-1" : "HK",
+ "ap-south-1" : "IN",
+ "ap-south-2" : "IN",
+ "ap-southeast-1" : "SG",
+ "ap-southeast-2" : "AU",
+ "ap-southeast-3" : "MY",
+ "ap-southeast-4" : "AU",
+ "ap-southeast-5" : "NZ", # Auckland, NZ
+ "ap-southeast-6" : "AP", # XXX: Precise location not documented anywhere
+
+ # Canada
+ "ca-central-1" : "CA",
+ "ca-west-1" : "CA",
+
+ # Europe
+ "eu-central-1" : "DE",
+ "eu-central-2" : "CH",
+ "eu-north-1" : "SE",
+ "eu-west-1" : "IE",
+ "eu-west-2" : "GB",
+ "eu-west-3" : "FR",
+ "eu-south-1" : "IT",
+ "eu-south-2" : "ES",
+
+ # Middle East
+ "me-central-1" : "AE",
+ "me-south-1" : "BH",
+
+ # South America
+ "sa-east-1" : "BR",
+
+ # Undocumented, likely located in Berlin rather than Frankfurt
+ "eusc-de-east-1" : "DE",
+ }
+
+ # Collect a list of all networks
+ prefixes = feed.get("ipv6_prefixes", []) + feed.get("prefixes", [])
+
+ for prefix in prefixes:
+ # Fetch network
+ network = prefix.get("ipv6_prefix") or prefix.get("ip_prefix")
+
+ # Parse the network
+ try:
+ network = ipaddress.ip_network(network)
+ except ValuleError as e:
+ log.warning("%s: Unable to parse prefix %s" % (name, network))
+ continue
- # Fetch all valid country codes to check parsed networks aganist...
- rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
- validcountries = []
+ # Sanitize parsed networks...
+ if not self._check_parsed_network(network):
+ continue
- for row in rows:
- validcountries.append(row.country_code)
+ # Fetch the region
+ region = prefix.get("region")
- with self.db.transaction():
- for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
- try:
- network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
- except ValueError:
- log.warning("Unable to parse line: %s" % snetwork)
- continue
+ # Set some defaults
+ cc = None
+ is_anycast = False
- # Sanitize parsed networks...
- if not self._check_parsed_network(network):
- continue
+ # Fetch the CC from the dictionary
+ try:
+ cc = aws_region_country_map[region]
- # Determine region of this network...
- region = snetwork["region"]
- cc = None
- is_anycast = False
+ # If we couldn't find anything, let's try something else...
+ except KeyError as e:
+ # Find anycast networks
+ if region == "GLOBAL":
+ is_anycast = True
- # Any region name starting with "us-" will get "US" country code assigned straight away...
- if region.startswith("us-"):
+ # Everything that starts with us- is probably in the United States
+ elif region.startswith("us-"):
cc = "US"
+
+ # Everything that starts with cn- is probably China
elif region.startswith("cn-"):
- # ... same goes for China ...
cc = "CN"
- elif region == "GLOBAL":
- # ... funny region name for anycast-like networks ...
- is_anycast = True
- elif region in aws_region_country_map:
- # ... assign looked up country code otherwise ...
- cc = aws_region_country_map[region]
+
+ # Log a warning for anything else
else:
- # ... and bail out if we are missing something here
- log.warning("Unable to determine country code for line: %s" % snetwork)
+ log.warning("%s: Could not determine country code for AWS region %s" \
+ % (name, region))
continue
- # Skip networks with unknown country codes
- if not is_anycast and validcountries and cc not in validcountries:
- log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
- (cc, network))
- return
-
- # Conduct SQL statement...
- self.db.execute("""
- INSERT INTO
- network_feeds
- (
- network,
- source,
- country,
- is_anycast
- )
- VALUES
- (
- %s, %s, %s, %s
- )
- ON CONFLICT (network, source) DO NOTHING
- """, "%s" % network, "Amazon AWS IP feed", cc, is_anycast,
+ # Write to database
+ self.db.execute("""
+ INSERT INTO
+ network_feeds
+ (
+ network,
+ source,
+ country,
+ is_anycast
)
+ VALUES
+ (
+ %s, %s, %s, %s
+ )
+ ON CONFLICT (network, source) DO NOTHING
+ """, "%s" % network, name, cc, is_anycast,
+ )
- def _update_feed_for_spamhaus_drop(self):
- downloader = location.importer.Downloader()
-
- ip_lists = [
- ("SPAMHAUS-DROP", "https://www.spamhaus.org/drop/drop.txt"),
- ("SPAMHAUS-EDROP", "https://www.spamhaus.org/drop/edrop.txt"),
- ("SPAMHAUS-DROPV6", "https://www.spamhaus.org/drop/dropv6.txt")
- ]
-
- asn_lists = [
- ("SPAMHAUS-ASNDROP", "https://www.spamhaus.org/drop/asndrop.json")
- ]
-
- for name, url in ip_lists:
- # Fetch IP list from given URL
- f = downloader.retrieve(url)
+ def _import_spamhaus_drop(self, name, f):
+ """
+ Import Spamhaus DROP IP feeds
+ """
+ # Count all lines
+ lines = 0
- # Split into lines
- fcontent = f.readlines()
+ # Walk through all lines
+ for line in f:
+ # Decode line
+ line = line.decode("utf-8")
- with self.db.transaction():
- # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
- # downloads.
- if len(fcontent) > 10:
- self.db.execute("DELETE FROM network_feeds WHERE source = %s", name)
- else:
- log.warning("%s (%s) returned likely bogus file, ignored" % (name, url))
- continue
+ # Strip off any comments
+ line, _, comment = line.partition(";")
- # Iterate through every line, filter comments and add remaining networks to
- # the override table in case they are valid...
- for sline in fcontent:
- # The response is assumed to be encoded in UTF-8...
- sline = sline.decode("utf-8")
+ # Ignore empty lines
+ if not line:
+ continue
- # Comments start with a semicolon...
- if sline.startswith(";"):
- continue
+ # Strip any excess whitespace
+ line = line.strip()
- # Extract network and ignore anything afterwards...
- try:
- network = ipaddress.ip_network(sline.split()[0], strict=False)
- except ValueError:
- log.error("Unable to parse line: %s" % sline)
- continue
+ # Increment line counter
+ lines += 1
- # Sanitize parsed networks...
- if not self._check_parsed_network(network):
- log.warning("Skipping bogus network found in %s (%s): %s" % \
- (name, url, network))
- continue
+ # Parse the network
+ try:
+ network = ipaddress.ip_network(line)
+ except ValueError as e:
+ log.warning("%s: Could not parse network: %s - %s" % (name, line, e))
+ continue
- # Conduct SQL statement...
- self.db.execute("""
- INSERT INTO
- network_feeds
- (
- network,
- source,
- is_drop
- )
- VALUES
- (
- %s, %s, %s
- )""", "%s" % network, name, True,
- )
+ # Check network
+ if not self._check_parsed_network(network):
+ log.warning("%s: Skipping bogus network: %s" % (name, network))
+ continue
- for name, url in asn_lists:
- # Fetch URL
- f = downloader.retrieve(url)
+ # Insert into the database
+ self.db.execute("""
+ INSERT INTO
+ network_feeds
+ (
+ network,
+ source,
+ is_drop
+ )
+ VALUES
+ (
+ %s, %s, %s
+ )""", "%s" % network, name, True,
+ )
- # Split into lines
- fcontent = f.readlines()
+ # Raise an exception if we could not import anything
+ if not lines:
+ raise RuntimeError("Received bogus feed %s with no data" % name)
- with self.db.transaction():
- # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
- # downloads.
- if len(fcontent) > 10:
- self.db.execute("DELETE FROM autnum_feeds WHERE source = %s", name)
- else:
- log.warning("%s (%s) returned likely bogus file, ignored" % (name, url))
- continue
+ def _import_spamhaus_asndrop(self, name, f):
+ """
+ Import Spamhaus ASNDROP feed
+ """
+ for line in f:
+ # Decode the line
+ line = line.decode("utf-8")
- # Iterate through every line, filter comments and add remaining ASNs to
- # the override table in case they are valid...
- for sline in fcontent:
- # The response is assumed to be encoded in UTF-8...
- sline = sline.decode("utf-8")
+ # Parse JSON
+ try:
+ line = json.loads(line)
+ except json.JSONDecodeError as e:
+ log.warning("%s: Unable to parse JSON object %s: %s" % (name, line, e))
+ continue
- # Load every line as a JSON object and try to obtain an ASN from it...
- try:
- lineobj = json.loads(sline)
- except json.decoder.JSONDecodeError:
- log.error("Unable to parse line as a JSON object: %s" % sline)
- continue
+ # Fetch type
+ type = line.get("type")
- # Skip line contiaining file metadata
- try:
- type = lineobj["type"]
+ # Skip any metadata
+ if type == "metadata":
+ continue
- if type == "metadata":
- continue
- except KeyError:
- pass
+ # Fetch ASN
+ asn = line.get("asn")
- try:
- asn = lineobj["asn"]
- as_name = lineobj["asname"]
- except KeyError:
- log.warning("Unable to extract necessary information from line: %s" % sline)
- continue
+ # Skip any lines without an ASN
+ if not asn:
+ continue
- # Filter invalid ASNs...
- if not self._check_parsed_asn(asn):
- log.warning("Skipping bogus ASN found in %s (%s): %s" % \
- (name, url, asn))
- continue
+ # Filter invalid ASNs
+ if not self._check_parsed_asn(asn):
+ log.warning("%s: Skipping bogus ASN %s" % (name, asn))
+ continue
- # Conduct SQL statement...
- self.db.execute("""
- INSERT INTO
- autnum_feeds
- (
- number,
- source,
- is_drop
- )
- VALUES
- (
- %s, %s, %s
- )""", "%s" % asn, name, True,
- )
+ # Write to database
+ self.db.execute("""
+ INSERT INTO
+ autnum_feeds
+ (
+ number,
+ source,
+ is_drop
+ )
+ VALUES
+ (
+ %s, %s, %s
+ )""", "%s" % asn, name, True,
+ )
@staticmethod
def _parse_bool(block, key):
# Default to None
return None
- @property
- def countries(self):
- # Fetch all valid country codes to check parsed networks aganist
- rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
-
- # Return all countries
- return [row.country_code for row in rows]
-
def handle_import_countries(self, ns):
with self.db.transaction():
# Drop all data that we have
return key, val
+def read_blocks(f):
+ for block in iterate_over_blocks(f):
+ type = None
+ data = {}
+
+ for i, line in enumerate(block):
+ key, value = line.split(":", 1)
+
+ # The key of the first line defines the type
+ if i == 0:
+ type = key
+
+ # Store value
+ data[key] = value.strip()
+
+ yield type, data
+
+def iterate_over_blocks(f, charsets=("utf-8", "latin1")):
+ block = []
+
+ for line in f:
+ # Skip commented lines
+ if line.startswith(b"#") or line.startswith(b"%"):
+ continue
+
+ # Convert to string
+ for charset in charsets:
+ try:
+ line = line.decode(charset)
+ except UnicodeDecodeError:
+ continue
+ else:
+ break
+
+ # Remove any comments at the end of line
+ line, hash, comment = line.partition("#")
+
+ # Strip any whitespace at the end of the line
+ line = line.rstrip()
+
+ # If we cut off some comment and the line is empty, we can skip it
+ if comment and not line:
+ continue
+
+ # If the line has some content, keep collecting it
+ if line:
+ block.append(line)
+ continue
+
+ # End the block on an empty line
+ if block:
+ yield block
+
+ # Reset the block
+ block = []
+
+ # Return the last block
+ if block:
+ yield block
+
+def iterate_over_lines(f):
+ for line in f:
+ # Decode the line
+ line = line.decode()
+
+ # Strip the ending
+ yield line.rstrip()
+
def main():
# Run the command line interface
c = CLI()