# #
# libloc - A library to determine the location of someone on the Internet #
# #
-# Copyright (C) 2020-2022 IPFire Development Team <info@ipfire.org> #
+# Copyright (C) 2020-2024 IPFire Development Team <info@ipfire.org> #
# #
# This library is free software; you can redistribute it and/or #
# modify it under the terms of the GNU Lesser General Public #
###############################################################################
import argparse
+import concurrent.futures
+import csv
+import functools
+import http.client
import ipaddress
import json
import logging
import re
import socket
import sys
-import telnetlib
import urllib.error
# Load our location module
(131072, 4199999999),
)
+TRANSLATED_COUNTRIES = {
+ # When people say UK, they mean GB
+ "UK" : "GB",
+}
+
+IGNORED_COUNTRIES = set((
+ # Formerly Yugoslavia
+ "YU",
+
+ # Some people use ZZ to say "no country" or to hide the country
+ "ZZ",
+))
+
+# Configure the CSV parser for ARIN
+csv.register_dialect("arin", delimiter=",", quoting=csv.QUOTE_ALL, quotechar="\"")
class CLI(object):
def parse_cli(self):
update_announcements.add_argument("server", nargs=1,
help=_("Route Server to connect to"), metavar=_("SERVER"))
+ # Update geofeeds
+ update_geofeeds = subparsers.add_parser("update-geofeeds",
+ help=_("Update Geofeeds"))
+ update_geofeeds.set_defaults(func=self.handle_update_geofeeds)
+
+ # Update feeds
+ update_feeds = subparsers.add_parser("update-feeds",
+ help=_("Update Feeds"))
+ update_feeds.add_argument("feeds", nargs="*",
+ help=_("Only update these feeds"))
+ update_feeds.set_defaults(func=self.handle_update_feeds)
+
# Update overrides
update_overrides = subparsers.add_parser("update-overrides",
help=_("Update overrides"),
CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
+ -- geofeeds
+ CREATE TABLE IF NOT EXISTS geofeeds(
+ id serial primary key,
+ url text,
+ status integer default null,
+ updated_at timestamp without time zone default null
+ );
+ ALTER TABLE geofeeds ADD COLUMN IF NOT EXISTS error text;
+ CREATE UNIQUE INDEX IF NOT EXISTS geofeeds_unique
+ ON geofeeds(url);
+ CREATE TABLE IF NOT EXISTS geofeed_networks(
+ geofeed_id integer references geofeeds(id) on delete cascade,
+ network inet,
+ country text,
+ region text,
+ city text
+ );
+ CREATE INDEX IF NOT EXISTS geofeed_networks_geofeed_id
+ ON geofeed_networks(geofeed_id);
+ CREATE INDEX IF NOT EXISTS geofeed_networks_search
+ ON geofeed_networks USING GIST(network inet_ops);
+ CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
+ CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique
+ ON network_geofeeds(network);
+ CREATE INDEX IF NOT EXISTS network_geofeeds_search
+ ON network_geofeeds USING GIST(network inet_ops);
+ CREATE INDEX IF NOT EXISTS network_geofeeds_url
+ ON network_geofeeds(url);
+
+ -- feeds
+ CREATE TABLE IF NOT EXISTS autnum_feeds(
+ number bigint NOT NULL,
+ source text NOT NULL,
+ name text,
+ country text,
+ is_anonymous_proxy boolean,
+ is_satellite_provider boolean,
+ is_anycast boolean,
+ is_drop boolean
+ );
+ CREATE UNIQUE INDEX IF NOT EXISTS autnum_feeds_unique
+ ON autnum_feeds(number, source);
+
+ CREATE TABLE IF NOT EXISTS network_feeds(
+ network inet NOT NULL,
+ source text NOT NULL,
+ country text,
+ is_anonymous_proxy boolean,
+ is_satellite_provider boolean,
+ is_anycast boolean,
+ is_drop boolean
+ );
+ CREATE UNIQUE INDEX IF NOT EXISTS network_feeds_unique
+ ON network_feeds(network, source);
+ CREATE INDEX IF NOT EXISTS network_feeds_search
+ ON network_feeds USING GIST(network inet_ops);
+
-- overrides
CREATE TABLE IF NOT EXISTS autnum_overrides(
number bigint NOT NULL,
);
CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
ON autnum_overrides(number);
- ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
+ ALTER TABLE autnum_overrides DROP COLUMN IF EXISTS source;
CREATE TABLE IF NOT EXISTS network_overrides(
network inet NOT NULL,
ON network_overrides(network);
CREATE INDEX IF NOT EXISTS network_overrides_search
ON network_overrides USING GIST(network inet_ops);
- ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
+ ALTER TABLE network_overrides DROP COLUMN IF EXISTS source;
""")
return db
+ def fetch_countries(self):
+ """
+ Returns a list of all countries on the list
+ """
+ # Fetch all valid country codes to check parsed networks aganist...
+ countries = self.db.query("SELECT country_code FROM countries ORDER BY country_code")
+
+ return set((country.country_code for country in countries))
+
def handle_write(self, ns):
"""
Compiles a database in libloc format out of what is in the database
SELECT
autnums.number AS number,
COALESCE(
- (SELECT overrides.name FROM autnum_overrides overrides
- WHERE overrides.number = autnums.number),
+ overrides.name,
autnums.name
) AS name
- FROM autnums
- WHERE name <> %s ORDER BY number
- """, "")
+ FROM
+ autnums
+ LEFT JOIN
+ autnum_overrides overrides ON autnums.number = overrides.number
+ ORDER BY
+ autnums.number
+ """)
for row in rows:
+ # Skip AS without names
+ if not row.name:
+ continue
+
a = writer.add_as(row.number)
a.name = row.name
UNION
SELECT network FROM networks
UNION
+ SELECT network FROM network_feeds
+ UNION
SELECT network FROM network_overrides
+ UNION
+ SELECT network FROM geofeed_networks
),
ordered_networks AS (
-- Country
COALESCE(
(
- SELECT country FROM network_overrides overrides
- WHERE networks.network <<= overrides.network
- ORDER BY masklen(overrides.network) DESC
- LIMIT 1
+ SELECT
+ country
+ FROM
+ network_overrides overrides
+ WHERE
+ networks.network <<= overrides.network
+ ORDER BY
+ masklen(overrides.network) DESC
+ LIMIT 1
),
(
- SELECT country FROM autnum_overrides overrides
- WHERE networks.autnum = overrides.number
+ SELECT
+ country
+ FROM
+ autnum_overrides overrides
+ WHERE
+ networks.autnum = overrides.number
+ ),
+ (
+ SELECT
+ country
+ FROM
+ network_feeds feeds
+ WHERE
+ networks.network <<= feeds.network
+ ORDER BY
+ masklen(feeds.network) DESC
+ LIMIT 1
+ ),
+ (
+ SELECT
+ country
+ FROM
+ autnum_feeds feeds
+ WHERE
+ networks.autnum = feeds.number
+ ORDER BY
+ source
+ LIMIT 1
+ ),
+ (
+ SELECT
+ geofeed_networks.country AS country
+ FROM
+ network_geofeeds
+
+ -- Join the data from the geofeeds
+ LEFT JOIN
+ geofeeds ON network_geofeeds.url = geofeeds.url
+ LEFT JOIN
+ geofeed_networks ON geofeeds.id = geofeed_networks.geofeed_id
+
+ -- Check whether we have a geofeed for this network
+ WHERE
+ networks.network <<= network_geofeeds.network
+ AND
+ networks.network <<= geofeed_networks.network
+
+ -- Filter for the best result
+ ORDER BY
+ masklen(geofeed_networks.network) DESC
+ LIMIT 1
),
networks.country
) AS country,
-- Flags
COALESCE(
(
- SELECT is_anonymous_proxy FROM network_overrides overrides
- WHERE networks.network <<= overrides.network
- ORDER BY masklen(overrides.network) DESC
- LIMIT 1
+ SELECT
+ is_anonymous_proxy
+ FROM
+ network_overrides overrides
+ WHERE
+ networks.network <<= overrides.network
+ ORDER BY
+ masklen(overrides.network) DESC
+ LIMIT 1
),
(
- SELECT is_anonymous_proxy FROM autnum_overrides overrides
- WHERE networks.autnum = overrides.number
+ SELECT
+ is_anonymous_proxy
+ FROM
+ network_feeds feeds
+ WHERE
+ networks.network <<= feeds.network
+ ORDER BY
+ masklen(feeds.network) DESC
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_anonymous_proxy
+ FROM
+ autnum_feeds feeds
+ WHERE
+ networks.autnum = feeds.number
+ ORDER BY
+ source
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_anonymous_proxy
+ FROM
+ autnum_overrides overrides
+ WHERE
+ networks.autnum = overrides.number
),
FALSE
) AS is_anonymous_proxy,
COALESCE(
(
- SELECT is_satellite_provider FROM network_overrides overrides
- WHERE networks.network <<= overrides.network
- ORDER BY masklen(overrides.network) DESC
- LIMIT 1
+ SELECT
+ is_satellite_provider
+ FROM
+ network_overrides overrides
+ WHERE
+ networks.network <<= overrides.network
+ ORDER BY
+ masklen(overrides.network) DESC
+ LIMIT 1
),
(
- SELECT is_satellite_provider FROM autnum_overrides overrides
- WHERE networks.autnum = overrides.number
+ SELECT
+ is_satellite_provider
+ FROM
+ network_feeds feeds
+ WHERE
+ networks.network <<= feeds.network
+ ORDER BY
+ masklen(feeds.network) DESC
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_satellite_provider
+ FROM
+ autnum_feeds feeds
+ WHERE
+ networks.autnum = feeds.number
+ ORDER BY
+ source
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_satellite_provider
+ FROM
+ autnum_overrides overrides
+ WHERE
+ networks.autnum = overrides.number
),
FALSE
) AS is_satellite_provider,
COALESCE(
(
- SELECT is_anycast FROM network_overrides overrides
- WHERE networks.network <<= overrides.network
- ORDER BY masklen(overrides.network) DESC
- LIMIT 1
+ SELECT
+ is_anycast
+ FROM
+ network_overrides overrides
+ WHERE
+ networks.network <<= overrides.network
+ ORDER BY
+ masklen(overrides.network) DESC
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_anycast
+ FROM
+ network_feeds feeds
+ WHERE
+ networks.network <<= feeds.network
+ ORDER BY
+ masklen(feeds.network) DESC
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_anycast
+ FROM
+ autnum_feeds feeds
+ WHERE
+ networks.autnum = feeds.number
+ ORDER BY
+ source
+ LIMIT 1
),
(
- SELECT is_anycast FROM autnum_overrides overrides
- WHERE networks.autnum = overrides.number
+ SELECT
+ is_anycast
+ FROM
+ autnum_overrides overrides
+ WHERE
+ networks.autnum = overrides.number
),
FALSE
) AS is_anycast,
COALESCE(
(
- SELECT is_drop FROM network_overrides overrides
- WHERE networks.network <<= overrides.network
- ORDER BY masklen(overrides.network) DESC
- LIMIT 1
+ SELECT
+ is_drop
+ FROM
+ network_overrides overrides
+ WHERE
+ networks.network <<= overrides.network
+ ORDER BY
+ masklen(overrides.network) DESC
+ LIMIT 1
),
(
- SELECT is_drop FROM autnum_overrides overrides
- WHERE networks.autnum = overrides.number
+ SELECT
+ is_drop
+ FROM
+ network_feeds feeds
+ WHERE
+ networks.network <<= feeds.network
+ ORDER BY
+ masklen(feeds.network) DESC
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_drop
+ FROM
+ autnum_feeds feeds
+ WHERE
+ networks.autnum = feeds.number
+ ORDER BY
+ source
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_drop
+ FROM
+ autnum_overrides overrides
+ WHERE
+ networks.autnum = overrides.number
),
FALSE
) AS is_drop
# Did we run successfully?
error = False
- # Fetch all valid country codes to check parsed networks aganist
- validcountries = self.countries
+ # Fetch all valid country codes to check parsed networks against
+ countries = self.fetch_countries()
+
+ # Check if we have countries
+ if not countries:
+ log.error("Please import countries before importing any WHOIS data")
+ return 1
# Iterate over all potential sources
for source in sorted(location.importer.SOURCES):
name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
- CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL,
+ CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text,
original_countries text[] NOT NULL, source text NOT NULL)
ON COMMIT DROP;
CREATE INDEX _rirdata_search ON _rirdata
""")
# Remove all previously imported content
+ self.db.execute("DELETE FROM autnums WHERE source = %s", source)
self.db.execute("DELETE FROM networks WHERE source = %s", source)
try:
# Fetch WHOIS sources
for url in location.importer.WHOIS_SOURCES.get(source, []):
for block in downloader.request_blocks(url):
- self._parse_block(block, source, validcountries)
+ self._parse_block(block, source, countries)
# Fetch extended sources
for url in location.importer.EXTENDED_SOURCES.get(source, []):
for line in downloader.request_lines(url):
- self._parse_line(line, source, validcountries)
+ self._parse_line(line, source, countries)
except urllib.error.URLError as e:
log.error("Could not retrieve data from %s: %s" % (source, e))
error = True
# Process all parsed networks from every RIR we happen to have access to,
# insert the largest network chunks into the networks table immediately...
- families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata \
- ORDER BY family(network)")
+ families = self.db.query("""
+ SELECT DISTINCT
+ family(network) AS family
+ FROM
+ _rirdata
+ ORDER BY
+ family(network)
+ """,
+ )
for family in (row.family for row in families):
# Fetch the smallest mask length in our data set
FROM
_rirdata
WHERE
- family(network) = %s""",
- family,
+ family(network) = %s
+ """, family,
)
# Copy all networks
family(network) = %s
ORDER BY
masklen(network) ASC
- OFFSET 1""",
- family,
+ OFFSET 1
+ """, family,
)
# ... and insert networks with this prefix in case they provide additional
parent_country IS NULL
OR
country <> parent_country
- ON CONFLICT DO NOTHING""",
- family, prefix,
+ ON CONFLICT DO NOTHING
+ """, family, prefix,
)
self.db.execute("""
- INSERT INTO autnums(number, name, source)
- SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
- JOIN _organizations ON _autnums.organization = _organizations.handle
- ON CONFLICT (number) DO UPDATE SET name = excluded.name;
- """)
+ INSERT INTO
+ autnums
+ (
+ number,
+ name,
+ source
+ )
+ SELECT
+ _autnums.number,
+ _organizations.name,
+ _organizations.source
+ FROM
+ _autnums
+ JOIN
+ _organizations ON _autnums.organization = _organizations.handle
+ ON CONFLICT
+ (
+ number
+ )
+ DO UPDATE
+ SET name = excluded.name
+ """,
+ )
# Download and import (technical) AS names from ARIN
with self.db.transaction():
- self._import_as_names_from_arin()
+ self._import_as_names_from_arin(downloader)
# Return a non-zero exit code for errors
return 1 if error else 0
We will return False in case a network is not suitable for adding
it to our database, and True otherwise.
"""
+ # Check input
+ if isinstance(network, ipaddress.IPv6Network):
+ pass
+ elif isinstance(network, ipaddress.IPv4Network):
+ pass
+ else:
+ raise ValueError("Invalid network: %s (type %s)" % (network, type(network)))
- if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
- return False
-
+ # Ignore anything that isn't globally routable
if not network.is_global:
log.debug("Skipping non-globally routable network: %s" % network)
return False
+ # Ignore anything that is unspecified IP range (See RFC 5735 for IPv4 or RFC 2373 for IPv6)
+ elif network.is_unspecified:
+ log.debug("Skipping unspecified network: %s" % network)
+ return False
+
if network.version == 4:
if network.prefixlen < 7:
log.debug("Skipping too big IP chunk: %s" % network)
log.debug("Skipping network too small to be publicly announced: %s" % network)
return False
- if str(network.network_address) == "0.0.0.0":
- log.debug("Skipping network based on 0.0.0.0: %s" % network)
- return False
-
elif network.version == 6:
if network.prefixlen < 10:
log.debug("Skipping too big IP chunk: %s" % network)
log.debug("Skipping network too small to be publicly announced: %s" % network)
return False
- if str(network.network_address) == "::":
- log.debug("Skipping network based on '::': %s" % network)
- return False
-
- else:
- # This should not happen...
- log.warning("Skipping network of unknown family, this should not happen: %s" % network)
- return False
-
# In case we have made it here, the network is considered to
# be suitable for libloc consumption...
return True
log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
return False
- def _parse_block(self, block, source_key, validcountries = None):
+ def _parse_block(self, block, source_key, countries):
# Get first line to find out what type of block this is
line = block[0]
# inetnum
if line.startswith("inet6num:") or line.startswith("inetnum:"):
- return self._parse_inetnum_block(block, source_key, validcountries)
+ return self._parse_inetnum_block(block, source_key, countries)
# organisation
elif line.startswith("organisation:"):
autnum.get("asn"), autnum.get("org"), source_key,
)
- def _parse_inetnum_block(self, block, source_key, validcountries = None):
+ def _parse_inetnum_block(self, block, source_key, countries):
log.debug("Parsing inetnum block:")
inetnum = {}
inetnum[key] = [ipaddress.ip_network(val, strict=False)]
elif key == "country":
- val = val.upper()
+ cc = val.upper()
- # Catch RIR data objects with more than one country code...
- if not key in inetnum:
- inetnum[key] = []
- else:
- if val in inetnum.get("country"):
- # ... but keep this list distinct...
- continue
+ # Ignore certain country codes
+ if cc in IGNORED_COUNTRIES:
+ log.debug("Ignoring country code '%s'" % cc)
+ continue
- # When people set country codes to "UK", they actually mean "GB"
- if val == "UK":
- val = "GB"
+ # Translate country codes
+ try:
+ cc = TRANSLATED_COUNTRIES[cc]
+ except KeyError:
+ pass
- inetnum[key].append(val)
+ # Do we know this country?
+ if not cc in countries:
+ log.warning("Skipping invalid country code '%s'" % cc)
+ continue
+
+ try:
+ inetnum[key].append(cc)
+ except KeyError:
+ inetnum[key] = [cc]
+
+ # Parse the geofeed attribute
+ elif key == "geofeed":
+ inetnum["geofeed"] = val
+
+ # Parse geofeed when used as a remark
+ elif key == "remarks":
+ m = re.match(r"^(?:Geofeed)\s+(https://.*)", val)
+ if m:
+ inetnum["geofeed"] = m.group(1)
# Skip empty objects
- if not inetnum or not "country" in inetnum:
+ if not inetnum:
return
- # Prepare skipping objects with unknown country codes...
- invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
-
# Iterate through all networks enumerated from above, check them for plausibility and insert
# them into the database, if _check_parsed_network() succeeded
for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
- if self._check_parsed_network(single_network):
+ if not self._check_parsed_network(single_network):
+ continue
- # Skip objects with unknown country codes if they are valid to avoid log spam...
- if validcountries and invalidcountries:
- log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
- (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
- break
+ # Fetch the countries or use a list with an empty country
+ countries = inetnum.get("country", [None])
+
+ # Insert the network into the database but only use the first country code
+ for cc in countries:
+ self.db.execute("""
+ INSERT INTO
+ _rirdata
+ (
+ network,
+ country,
+ original_countries,
+ source
+ )
+ VALUES
+ (
+ %s, %s, %s, %s
+ )
+ ON CONFLICT (network)
+ DO UPDATE SET country = excluded.country
+ """, "%s" % single_network, cc, [cc for cc in countries if cc], source_key,
+ )
+
+ # If there are more than one country, we will only use the first one
+ break
+
+ # Update any geofeed information
+ geofeed = inetnum.get("geofeed", None)
+ if geofeed:
+ self._parse_geofeed(geofeed, single_network)
+
+ # Delete any previous geofeeds
+ else:
+ self.db.execute(
+ "DELETE FROM network_geofeeds WHERE network = %s", "%s" % single_network,
+ )
- # Everything is fine here, run INSERT statement...
- self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
- VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
- "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
+ def _parse_geofeed(self, url, single_network):
+ # Parse the URL
+ url = urllib.parse.urlparse(url)
+
+ # Make sure that this is a HTTPS URL
+ if not url.scheme == "https":
+ log.debug("Geofeed URL is not using HTTPS: %s" % geofeed)
+ return
+
+ # Put the URL back together normalized
+ url = url.geturl()
+
+ # Store/update any geofeeds
+ self.db.execute("""
+ INSERT INTO
+ network_geofeeds(
+ network,
+ url
)
+ VALUES(
+ %s, %s
+ )
+ ON CONFLICT (network) DO
+ UPDATE SET url = excluded.url""",
+ "%s" % single_network, url,
+ )
def _parse_org_block(self, block, source_key):
org = {}
org.get("organisation"), org.get("org-name"), source_key,
)
- def _parse_line(self, line, source_key, validcountries = None):
+ def _parse_line(self, line, source_key, validcountries=None):
# Skip version line
if line.startswith("2"):
return
log.warning("Could not parse line: %s" % line)
return
+ # Skip any unknown protocols
+ if not type in ("ipv6", "ipv4"):
+ log.warning("Unknown IP protocol '%s'" % type)
+ return
+
# Skip any lines that are for stats only or do not have a country
# code at all (avoids log spam below)
if not country_code or country_code == '*':
(country_code, line))
return
- if type in ("ipv6", "ipv4"):
- return self._parse_ip_line(country_code, type, line, source_key)
-
- def _parse_ip_line(self, country, type, line, source_key):
try:
address, prefix, date, status, organization = line.split("|")
except ValueError:
if not self._check_parsed_network(network):
return
- self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
- VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
- UPDATE SET country = excluded.country",
- "%s" % network, country, [country], source_key,
+ self.db.execute("""
+ INSERT INTO
+ networks
+ (
+ network,
+ country,
+ original_countries,
+ source
+ )
+ VALUES
+ (
+ %s, %s, %s, %s
+ )
+ ON CONFLICT (network)
+ DO UPDATE SET country = excluded.country
+ """, "%s" % network, country_code, [country], source_key,
)
- def _import_as_names_from_arin(self):
- downloader = location.importer.Downloader()
+ def _import_as_names_from_arin(self, downloader):
+ # Delete all previously imported content
+ self.db.execute("DELETE FROM autnums WHERE source = %s", "ARIN")
+
+ # Try to retrieve the feed from ftp.arin.net
+ feed = downloader.request_lines("https://ftp.arin.net/pub/resource_registry_service/asns.csv")
+
+ # Walk through the file
+ for line in csv.DictReader(feed, dialect="arin"):
+ log.debug("Processing object: %s" % line)
- # XXX: Download AS names file from ARIN (note that these names appear to be quite
- # technical, not intended for human consumption, as description fields in
- # organisation handles for other RIRs are - however, this is what we have got,
- # and in some cases, it might be still better than nothing)
- for line in downloader.request_lines("https://ftp.arin.net/info/asn.txt"):
- # Valid lines start with a space, followed by the number of the Autonomous System ...
- if not line.startswith(" "):
+ # Fetch status
+ status = line.get("Status")
+
+ # We are only interested in anything managed by ARIN
+ if not status == "Full Registry Services":
continue
- # Split line and check if there is a valid ASN in it...
- asn, name = line.split()[0:2]
+ # Fetch organization name
+ name = line.get("Org Name")
+
+ # Extract ASNs
+ first_asn = line.get("Start AS Number")
+ last_asn = line.get("End AS Number")
+ # Cast to a number
try:
- asn = int(asn)
- except ValueError:
- log.debug("Skipping ARIN AS names line not containing an integer for ASN")
+ first_asn = int(first_asn)
+ except TypeError as e:
+ log.warning("Could not parse ASN '%s'" % first_asn)
continue
- # Filter invalid ASNs...
- if not self._check_parsed_asn(asn):
+ try:
+ last_asn = int(last_asn)
+ except TypeError as e:
+ log.warning("Could not parse ASN '%s'" % last_asn)
continue
- # Skip any AS name that appears to be a placeholder for a different RIR or entity...
- if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|LACNIC|RIPE|IANA)(?:\d?$|\-)", name):
- continue
+ # Check if the range is valid
+ if last_asn < first_asn:
+ log.warning("Invalid ASN range %s-%s" % (first_asn, last_asn))
- # Bail out in case the AS name contains anything we do not expect here...
- if re.search(r"[^a-zA-Z0-9-_]", name):
- log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
- (asn, name))
+ # Insert everything into the database
+ for asn in range(first_asn, last_asn + 1):
+ if not self._check_parsed_asn(asn):
+ log.warning("Skipping invalid ASN %s" % asn)
+ continue
- # Things look good here, run INSERT statement and skip this one if we already have
- # a (better?) name for this Autonomous System...
- self.db.execute("""
- INSERT INTO autnums(
- number,
- name,
- source
- ) VALUES (%s, %s, %s)
- ON CONFLICT (number) DO NOTHING""",
- asn,
- name,
- "ARIN",
- )
+ self.db.execute("""
+ INSERT INTO
+ autnums
+ (
+ number,
+ name,
+ source
+ )
+ VALUES
+ (
+ %s, %s, %s
+ )
+ ON CONFLICT
+ (
+ number
+ )
+ DO NOTHING
+ """, asn, name, "ARIN",
+ )
def handle_update_announcements(self, ns):
server = ns.server[0]
with self.db.transaction():
if server.startswith("/"):
self._handle_update_announcements_from_bird(server)
- else:
- self._handle_update_announcements_from_telnet(server)
# Purge anything we never want here
self.db.execute("""
# We don't need to process any more
break
- def _handle_update_announcements_from_telnet(self, server):
- # Pre-compile regular expression for routes
- route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
-
- with telnetlib.Telnet(server) as t:
- # Enable debug mode
- #if ns.debug:
- # t.set_debuglevel(10)
-
- # Wait for console greeting
- greeting = t.read_until(b"> ", timeout=30)
- if not greeting:
- log.error("Could not get a console prompt")
- return 1
-
- # Disable pagination
- t.write(b"terminal length 0\n")
-
- # Wait for the prompt to return
- t.read_until(b"> ")
-
- # Fetch the routing tables
- for protocol in ("ipv6", "ipv4"):
- log.info("Requesting %s routing table" % protocol)
-
- # Request the full unicast routing table
- t.write(b"show bgp %s unicast\n" % protocol.encode())
-
- # Read entire header which ends with "Path"
- t.read_until(b"Path\r\n")
-
- while True:
- # Try reading a full entry
- # Those might be broken across multiple lines but ends with i
- line = t.read_until(b"i\r\n", timeout=5)
- if not line:
- break
-
- # Show line for debugging
- #log.debug(repr(line))
-
- # Try finding a route in here
- m = route.match(line)
- if m:
- network, autnum = m.groups()
-
- # Convert network to string
- network = network.decode()
-
- # Append /24 for IPv4 addresses
- if not "/" in network and not ":" in network:
- network = "%s/24" % network
-
- # Convert AS number to integer
- autnum = int(autnum)
-
- log.info("Found announcement for %s by %s" % (network, autnum))
-
- self.db.execute("INSERT INTO announcements(network, autnum) \
- VALUES(%s, %s) ON CONFLICT (network) DO \
- UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
- network, autnum,
- )
-
- log.info("Finished reading the %s routing table" % protocol)
-
def _bird_cmd(self, socket_path, command):
# Connect to the socket
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
# Otherwise return the line
yield line
- def handle_update_overrides(self, ns):
+ def handle_update_geofeeds(self, ns):
+ # Sync geofeeds
with self.db.transaction():
- # Only drop manually created overrides, as we can be reasonably sure to have them,
- # and preserve the rest. If appropriate, it is deleted by correspondent functions.
+ # Delete all geofeeds which are no longer linked
self.db.execute("""
- DELETE FROM autnum_overrides WHERE source = 'manual';
- DELETE FROM network_overrides WHERE source = 'manual';
+ DELETE FROM
+ geofeeds
+ WHERE
+ NOT EXISTS (
+ SELECT
+ 1
+ FROM
+ network_geofeeds
+ WHERE
+ geofeeds.url = network_geofeeds.url
+ )""",
+ )
+
+ # Copy all geofeeds
+ self.db.execute("""
+ INSERT INTO
+ geofeeds(
+ url
+ )
+ SELECT
+ url
+ FROM
+ network_geofeeds
+ ON CONFLICT (url)
+ DO NOTHING
+ """,
+ )
+
+ # Fetch all Geofeeds that require an update
+ geofeeds = self.db.query("""
+ SELECT
+ id,
+ url
+ FROM
+ geofeeds
+ WHERE
+ updated_at IS NULL
+ OR
+ updated_at <= CURRENT_TIMESTAMP - INTERVAL '1 week'
+ ORDER BY
+ id
+ """)
+
+ # Create a downloader
+ downloader = location.importer.Downloader()
+
+ # Pass the downloader to the fetch_geofeed function
+ fetch_geofeed = functools.partial(self._fetch_geofeed, downloader)
+
+ with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
+ results = executor.map(fetch_geofeed, geofeeds)
+
+ # Fetch all results to raise any exceptions
+ for result in results:
+ pass
+
+ # Delete data from any feeds that did not update in the last two weeks
+ with self.db.transaction():
+ self.db.execute("""
+ DELETE FROM
+ geofeed_networks
+ WHERE
+ geofeed_networks.geofeed_id IN (
+ SELECT
+ geofeeds.id
+ FROM
+ geofeeds
+ WHERE
+ updated_at IS NULL
+ OR
+ updated_at <= CURRENT_TIMESTAMP - INTERVAL '2 weeks'
+ )
""")
- # Update overrides for various cloud providers big enough to publish their own IP
- # network allocation lists in a machine-readable format...
- self._update_overrides_for_aws()
+ def _fetch_geofeed(self, downloader, geofeed):
+ log.debug("Fetching Geofeed %s" % geofeed.url)
+
+ with self.db.transaction():
+ # Open the URL
+ try:
+ # Send the request
+ f = downloader.retrieve(geofeed.url, headers={
+ "User-Agent" : "location/%s" % location.__version__,
+
+ # We expect some plain text file in CSV format
+ "Accept" : "text/csv, text/plain",
+ })
+
+ # Remove any previous data
+ self.db.execute("DELETE FROM geofeed_networks \
+ WHERE geofeed_id = %s", geofeed.id)
+
+ lineno = 0
+
+ # Read the output line by line
+ for line in f:
+ lineno += 1
+
+ try:
+ line = line.decode()
+
+ # Ignore any lines we cannot decode
+ except UnicodeDecodeError:
+ log.debug("Could not decode line %s in %s" \
+ % (lineno, geofeed.url))
+ continue
+
+ # Strip any newline
+ line = line.rstrip()
+
+ # Skip empty lines
+ if not line:
+ continue
+
+ # Try to parse the line
+ try:
+ fields = line.split(",", 5)
+ except ValueError:
+ log.debug("Could not parse line: %s" % line)
+ continue
+
+ # Check if we have enough fields
+ if len(fields) < 4:
+ log.debug("Not enough fields in line: %s" % line)
+ continue
+
+ # Fetch all fields
+ network, country, region, city, = fields[:4]
+
+ # Try to parse the network
+ try:
+ network = ipaddress.ip_network(network, strict=False)
+ except ValueError:
+ log.debug("Could not parse network: %s" % network)
+ continue
+
+ # Strip any excess whitespace from country codes
+ country = country.strip()
+
+ # Make the country code uppercase
+ country = country.upper()
+
+ # Check the country code
+ if not country:
+ log.debug("Empty country code in Geofeed %s line %s" \
+ % (geofeed.url, lineno))
+ continue
+
+ elif not location.country_code_is_valid(country):
+ log.debug("Invalid country code in Geofeed %s:%s: %s" \
+ % (geofeed.url, lineno, country))
+ continue
+
+ # Write this into the database
+ self.db.execute("""
+ INSERT INTO
+ geofeed_networks (
+ geofeed_id,
+ network,
+ country,
+ region,
+ city
+ )
+ VALUES (%s, %s, %s, %s, %s)""",
+ geofeed.id,
+ "%s" % network,
+ country,
+ region,
+ city,
+ )
+
+ # Catch any HTTP errors
+ except urllib.request.HTTPError as e:
+ self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
+ WHERE id = %s", e.code, "%s" % e, geofeed.id)
+
+ # Remove any previous data when the feed has been deleted
+ if e.code == 404:
+ self.db.execute("DELETE FROM geofeed_networks \
+ WHERE geofeed_id = %s", geofeed.id)
+
+ # Catch any other errors and connection timeouts
+ except (http.client.InvalidURL, urllib.request.URLError, TimeoutError) as e:
+ log.debug("Could not fetch URL %s: %s" % (geofeed.url, e))
+
+ self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
+ WHERE id = %s", 599, "%s" % e, geofeed.id)
- # Update overrides for Spamhaus DROP feeds...
- self._update_overrides_for_spamhaus_drop()
+ # Mark the geofeed as updated
+ else:
+ self.db.execute("""
+ UPDATE
+ geofeeds
+ SET
+ updated_at = CURRENT_TIMESTAMP,
+ status = NULL,
+ error = NULL
+ WHERE
+ id = %s""",
+ geofeed.id,
+ )
+
+ def handle_update_overrides(self, ns):
+ with self.db.transaction():
+ # Drop any previous content
+ self.db.execute("TRUNCATE TABLE autnum_overrides")
+ self.db.execute("TRUNCATE TABLE network_overrides")
for file in ns.files:
log.info("Reading %s..." % file)
continue
self.db.execute("""
- INSERT INTO network_overrides(
+ INSERT INTO
+ network_overrides
+ (
network,
country,
- source,
is_anonymous_proxy,
is_satellite_provider,
is_anycast,
is_drop
- ) VALUES (%s, %s, %s, %s, %s, %s, %s)
- ON CONFLICT (network) DO NOTHING""",
+ )
+ VALUES
+ (
+ %s, %s, %s, %s, %s, %s
+ )
+ ON CONFLICT (network) DO NOTHING
+ """,
"%s" % network,
block.get("country"),
- "manual",
self._parse_bool(block, "is-anonymous-proxy"),
self._parse_bool(block, "is-satellite-provider"),
self._parse_bool(block, "is-anycast"),
autnum = autnum[2:]
self.db.execute("""
- INSERT INTO autnum_overrides(
+ INSERT INTO
+ autnum_overrides
+ (
number,
name,
country,
- source,
is_anonymous_proxy,
is_satellite_provider,
is_anycast,
is_drop
- ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
- ON CONFLICT DO NOTHING""",
+ )
+ VALUES
+ (
+ %s, %s, %s, %s, %s, %s, %s
+ )
+ ON CONFLICT (number) DO NOTHING
+ """,
autnum,
block.get("name"),
block.get("country"),
- "manual",
self._parse_bool(block, "is-anonymous-proxy"),
self._parse_bool(block, "is-satellite-provider"),
self._parse_bool(block, "is-anycast"),
else:
log.warning("Unsupported type: %s" % type)
- def _update_overrides_for_aws(self):
- # Download Amazon AWS IP allocation file to create overrides...
+ def handle_update_feeds(self, ns):
+ """
+ Update any third-party feeds
+ """
+ success = True
+
+ # Create a downloader
downloader = location.importer.Downloader()
- try:
- # Fetch IP ranges
- f = downloader.retrieve("https://ip-ranges.amazonaws.com/ip-ranges.json")
+ feeds = (
+ # AWS IP Ranges
+ ("AWS-IP-RANGES", self._import_aws_ip_ranges, "https://ip-ranges.amazonaws.com/ip-ranges.json"),
- # Parse downloaded file
- aws_ip_dump = json.load(f)
- except Exception as e:
- log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
- return
+ # Spamhaus DROP
+ ("SPAMHAUS-DROP", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/drop.txt"),
+ ("SPAMHAUS-EDROP", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/edrop.txt"),
+ ("SPAMHAUS-DROPV6", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/dropv6.txt"),
- # At this point, we can assume the downloaded file to be valid
- self.db.execute("""
- DELETE FROM network_overrides WHERE source = 'Amazon AWS IP feed';
- """)
+ # Spamhaus ASNDROP
+ ("SPAMHAUS-ASNDROP", self._import_spamhaus_asndrop, "https://www.spamhaus.org/drop/asndrop.json"),
+ )
+
+ # Drop any data from feeds that we don't support (any more)
+ with self.db.transaction():
+ # Fetch the names of all feeds we support
+ sources = [name for name, *rest in feeds]
+
+ self.db.execute("DELETE FROM autnum_feeds WHERE NOT source = ANY(%s)", sources)
+ self.db.execute("DELETE FROM network_feeds WHERE NOT source = ANY(%s)", sources)
+
+ # Walk through all feeds
+ for name, callback, url, *args in feeds:
+ # Skip any feeds that were not requested on the command line
+ if ns.feeds and not name in ns.feeds:
+ continue
+
+ try:
+ self._process_feed(downloader, name, callback, url, *args)
+
+ # Log an error but continue if an exception occurs
+ except Exception as e:
+ log.error("Error processing feed '%s': %s" % (name, e))
+ success = False
+
+ # Return status
+ return 0 if success else 1
+
+ def _process_feed(self, downloader, name, callback, url, *args):
+ """
+ Processes one feed
+ """
+ # Open the URL
+ f = downloader.retrieve(url)
+
+ with self.db.transaction():
+ # Drop any previous content
+ self.db.execute("DELETE FROM autnum_feeds WHERE source = %s", name)
+ self.db.execute("DELETE FROM network_feeds WHERE source = %s", name)
- # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
+ # Call the callback to process the feed
+ return callback(name, f, *args)
+
+ def _import_aws_ip_ranges(self, name, f):
+ # Parse the feed
+ feed = json.load(f)
+
+ # Set up a dictionary for mapping a region name to a country. Unfortunately,
# there seems to be no machine-readable version available of this other than
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
# (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
# was helpful here as well.
aws_region_country_map = {
- "af-south-1": "ZA",
- "ap-east-1": "HK",
- "ap-south-1": "IN",
- "ap-south-2": "IN",
- "ap-northeast-3": "JP",
- "ap-northeast-2": "KR",
- "ap-southeast-1": "SG",
- "ap-southeast-2": "AU",
- "ap-southeast-3": "MY",
- "ap-southeast-4": "AU",
- "ap-southeast-6": "AP", # XXX: Precise location not documented anywhere
- "ap-northeast-1": "JP",
- "ca-central-1": "CA",
- "ca-west-1": "CA",
- "eu-central-1": "DE",
- "eu-central-2": "CH",
- "eu-west-1": "IE",
- "eu-west-2": "GB",
- "eu-south-1": "IT",
- "eu-south-2": "ES",
- "eu-west-3": "FR",
- "eu-north-1": "SE",
- "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
- "me-central-1": "AE",
- "me-south-1": "BH",
- "sa-east-1": "BR"
- }
+ # Africa
+ "af-south-1" : "ZA",
+
+ # Asia
+ "il-central-1" : "IL", # Tel Aviv
+
+ # Asia/Pacific
+ "ap-northeast-1" : "JP",
+ "ap-northeast-2" : "KR",
+ "ap-northeast-3" : "JP",
+ "ap-east-1" : "HK",
+ "ap-south-1" : "IN",
+ "ap-south-2" : "IN",
+ "ap-southeast-1" : "SG",
+ "ap-southeast-2" : "AU",
+ "ap-southeast-3" : "MY",
+ "ap-southeast-4" : "AU",
+ "ap-southeast-5" : "NZ", # Auckland, NZ
+ "ap-southeast-6" : "AP", # XXX: Precise location not documented anywhere
+
+ # Canada
+ "ca-central-1" : "CA",
+ "ca-west-1" : "CA",
+
+ # Europe
+ "eu-central-1" : "DE",
+ "eu-central-2" : "CH",
+ "eu-north-1" : "SE",
+ "eu-west-1" : "IE",
+ "eu-west-2" : "GB",
+ "eu-west-3" : "FR",
+ "eu-south-1" : "IT",
+ "eu-south-2" : "ES",
+
+ # Middle East
+ "me-central-1" : "AE",
+ "me-south-1" : "BH",
+
+ # South America
+ "sa-east-1" : "BR",
+
+ # Undocumented, likely located in Berlin rather than Frankfurt
+ "eusc-de-east-1" : "DE",
+ }
+
+ # Collect a list of all networks
+ prefixes = feed.get("ipv6_prefixes", []) + feed.get("prefixes", [])
+
+ for prefix in prefixes:
+ # Fetch network
+ network = prefix.get("ipv6_prefix") or prefix.get("ip_prefix")
+
+ # Parse the network
+ try:
+ network = ipaddress.ip_network(network)
+ except ValuleError as e:
+ log.warning("%s: Unable to parse prefix %s" % (name, network))
+ continue
- # Fetch all valid country codes to check parsed networks aganist...
- rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
- validcountries = []
+ # Sanitize parsed networks...
+ if not self._check_parsed_network(network):
+ continue
- for row in rows:
- validcountries.append(row.country_code)
+ # Fetch the region
+ region = prefix.get("region")
- with self.db.transaction():
- for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
- try:
- network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
- except ValueError:
- log.warning("Unable to parse line: %s" % snetwork)
- continue
+ # Set some defaults
+ cc = None
+ is_anycast = False
- # Sanitize parsed networks...
- if not self._check_parsed_network(network):
- continue
+ # Fetch the CC from the dictionary
+ try:
+ cc = aws_region_country_map[region]
- # Determine region of this network...
- region = snetwork["region"]
- cc = None
- is_anycast = False
+ # If we couldn't find anything, let's try something else...
+ except KeyError as e:
+ # Find anycast networks
+ if region == "GLOBAL":
+ is_anycast = True
- # Any region name starting with "us-" will get "US" country code assigned straight away...
- if region.startswith("us-"):
+ # Everything that starts with us- is probably in the United States
+ elif region.startswith("us-"):
cc = "US"
+
+ # Everything that starts with cn- is probably China
elif region.startswith("cn-"):
- # ... same goes for China ...
cc = "CN"
- elif region == "GLOBAL":
- # ... funny region name for anycast-like networks ...
- is_anycast = True
- elif region in aws_region_country_map:
- # ... assign looked up country code otherwise ...
- cc = aws_region_country_map[region]
+
+ # Log a warning for anything else
else:
- # ... and bail out if we are missing something here
- log.warning("Unable to determine country code for line: %s" % snetwork)
+ log.warning("%s: Could not determine country code for AWS region %s" \
+ % (name, region))
continue
- # Skip networks with unknown country codes
- if not is_anycast and validcountries and cc not in validcountries:
- log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
- (cc, network))
- return
-
- # Conduct SQL statement...
- self.db.execute("""
- INSERT INTO network_overrides(
- network,
- country,
- source,
- is_anonymous_proxy,
- is_satellite_provider,
- is_anycast
- ) VALUES (%s, %s, %s, %s, %s, %s)
- ON CONFLICT (network) DO NOTHING""",
- "%s" % network,
- cc,
- "Amazon AWS IP feed",
- None,
- None,
- is_anycast,
+ # Write to database
+ self.db.execute("""
+ INSERT INTO
+ network_feeds
+ (
+ network,
+ source,
+ country,
+ is_anycast
)
+ VALUES
+ (
+ %s, %s, %s, %s
+ )
+ ON CONFLICT (network, source) DO NOTHING
+ """, "%s" % network, name, cc, is_anycast,
+ )
+ def _import_spamhaus_drop(self, name, f):
+ """
+ Import Spamhaus DROP IP feeds
+ """
+ # Count all lines
+ lines = 0
- def _update_overrides_for_spamhaus_drop(self):
- downloader = location.importer.Downloader()
-
- ip_lists = [
- ("SPAMHAUS-DROP", "https://www.spamhaus.org/drop/drop.txt"),
- ("SPAMHAUS-EDROP", "https://www.spamhaus.org/drop/edrop.txt"),
- ("SPAMHAUS-DROPV6", "https://www.spamhaus.org/drop/dropv6.txt")
- ]
-
- asn_lists = [
- ("SPAMHAUS-ASNDROP", "https://www.spamhaus.org/drop/asndrop.txt")
- ]
-
- for name, url in ip_lists:
- # Fetch IP list from given URL
- f = downloader.retrieve(url)
+ # Walk through all lines
+ for line in f:
+ # Decode line
+ line = line.decode("utf-8")
- # Split into lines
- fcontent = f.readlines()
+ # Strip off any comments
+ line, _, comment = line.partition(";")
- with self.db.transaction():
- # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
- # downloads.
- if len(fcontent) > 10:
- self.db.execute("""
- DELETE FROM network_overrides WHERE source = '%s';
- """ % name,
- )
- else:
- log.error("%s (%s) returned likely bogus file, ignored" % (name, url))
- continue
+ # Ignore empty lines
+ if not line:
+ continue
- # Iterate through every line, filter comments and add remaining networks to
- # the override table in case they are valid...
- for sline in fcontent:
- # The response is assumed to be encoded in UTF-8...
- sline = sline.decode("utf-8")
+ # Strip any excess whitespace
+ line = line.strip()
- # Comments start with a semicolon...
- if sline.startswith(";"):
- continue
+ # Increment line counter
+ lines += 1
- # Extract network and ignore anything afterwards...
- try:
- network = ipaddress.ip_network(sline.split()[0], strict=False)
- except ValueError:
- log.error("Unable to parse line: %s" % sline)
- continue
-
- # Sanitize parsed networks...
- if not self._check_parsed_network(network):
- log.warning("Skipping bogus network found in %s (%s): %s" % \
- (name, url, network))
- continue
+ # Parse the network
+ try:
+ network = ipaddress.ip_network(line)
+ except ValueError as e:
+ log.warning("%s: Could not parse network: %s - %s" % (name, line, e))
+ continue
- # Conduct SQL statement...
- self.db.execute("""
- INSERT INTO network_overrides(
- network,
- source,
- is_drop
- ) VALUES (%s, %s, %s)
- ON CONFLICT (network) DO UPDATE SET is_drop = True""",
- "%s" % network,
- name,
- True
- )
+ # Check network
+ if not self._check_parsed_network(network):
+ log.warning("%s: Skipping bogus network: %s" % (name, network))
+ continue
- for name, url in asn_lists:
- # Fetch URL
- f = downloader.retrieve(url)
+ # Insert into the database
+ self.db.execute("""
+ INSERT INTO
+ network_feeds
+ (
+ network,
+ source,
+ is_drop
+ )
+ VALUES
+ (
+ %s, %s, %s
+ )""", "%s" % network, name, True,
+ )
- # Split into lines
- fcontent = f.readlines()
+ # Raise an exception if we could not import anything
+ if not lines:
+ raise RuntimeError("Received bogus feed %s with no data" % name)
- with self.db.transaction():
- # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
- # downloads.
- if len(fcontent) > 10:
- self.db.execute("""
- DELETE FROM autnum_overrides WHERE source = '%s';
- """ % name,
- )
- else:
- log.error("%s (%s) returned likely bogus file, ignored" % (name, url))
- continue
+ def _import_spamhaus_asndrop(self, name, f):
+ """
+ Import Spamhaus ASNDROP feed
+ """
+ for line in f:
+ # Decode the line
+ line = line.decode("utf-8")
- # Iterate through every line, filter comments and add remaining ASNs to
- # the override table in case they are valid...
- for sline in f.readlines():
- # The response is assumed to be encoded in UTF-8...
- sline = sline.decode("utf-8")
+ # Parse JSON
+ try:
+ line = json.loads(line)
+ except json.JSONDecodeError as e:
+ log.warning("%s: Unable to parse JSON object %s: %s" % (name, line, e))
+ continue
- # Comments start with a semicolon...
- if sline.startswith(";"):
- continue
+ # Fetch type
+ type = line.get("type")
- # Throw away anything after the first space...
- sline = sline.split()[0]
+ # Skip any metadata
+ if type == "metadata":
+ continue
- # ... strip the "AS" prefix from it ...
- sline = sline.strip("AS")
+ # Fetch ASN
+ asn = line.get("asn")
- # ... and convert it into an integer. Voila.
- asn = int(sline)
+ # Skip any lines without an ASN
+ if not asn:
+ continue
- # Filter invalid ASNs...
- if not self._check_parsed_asn(asn):
- log.warning("Skipping bogus ASN found in %s (%s): %s" % \
- (name, url, asn))
- continue
+ # Filter invalid ASNs
+ if not self._check_parsed_asn(asn):
+ log.warning("%s: Skipping bogus ASN %s" % (name, asn))
+ continue
- # Conduct SQL statement...
- self.db.execute("""
- INSERT INTO autnum_overrides(
- number,
- source,
- is_drop
- ) VALUES (%s, %s, %s)
- ON CONFLICT (number) DO UPDATE SET is_drop = True""",
- "%s" % asn,
- name,
- True
- )
+ # Write to database
+ self.db.execute("""
+ INSERT INTO
+ autnum_feeds
+ (
+ number,
+ source,
+ is_drop
+ )
+ VALUES
+ (
+ %s, %s, %s
+ )""", "%s" % asn, name, True,
+ )
@staticmethod
def _parse_bool(block, key):
# Default to None
return None
- @property
- def countries(self):
- # Fetch all valid country codes to check parsed networks aganist
- rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
-
- # Return all countries
- return [row.country_code for row in rows]
-
def handle_import_countries(self, ns):
with self.db.transaction():
# Drop all data that we have