# #
# libloc - A library to determine the location of someone on the Internet #
# #
-# Copyright (C) 2020-2022 IPFire Development Team <info@ipfire.org> #
+# Copyright (C) 2020-2024 IPFire Development Team <info@ipfire.org> #
# #
# This library is free software; you can redistribute it and/or #
# modify it under the terms of the GNU Lesser General Public #
###############################################################################
import argparse
+import concurrent.futures
+import csv
+import http.client
import ipaddress
import json
import logging
import re
import socket
import sys
-import telnetlib
+import urllib.error
# Load our location module
import location
(131072, 4199999999),
)
+# Configure the CSV parser for ARIN
+csv.register_dialect("arin", delimiter=",", quoting=csv.QUOTE_ALL, quotechar="\"")
class CLI(object):
def parse_cli(self):
update_announcements.add_argument("server", nargs=1,
help=_("Route Server to connect to"), metavar=_("SERVER"))
+ # Update geofeeds
+ update_geofeeds = subparsers.add_parser("update-geofeeds",
+ help=_("Update Geofeeds"))
+ update_geofeeds.set_defaults(func=self.handle_update_geofeeds)
+
+ # Update feeds
+ update_feeds = subparsers.add_parser("update-feeds",
+ help=_("Update Feeds"))
+ update_feeds.set_defaults(func=self.handle_update_feeds)
+
# Update overrides
update_overrides = subparsers.add_parser("update-overrides",
help=_("Update overrides"),
CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
+ -- geofeeds
+ CREATE TABLE IF NOT EXISTS geofeeds(
+ id serial primary key,
+ url text,
+ status integer default null,
+ updated_at timestamp without time zone default null
+ );
+ ALTER TABLE geofeeds ADD COLUMN IF NOT EXISTS error text;
+ CREATE UNIQUE INDEX IF NOT EXISTS geofeeds_unique
+ ON geofeeds(url);
+ CREATE TABLE IF NOT EXISTS geofeed_networks(
+ geofeed_id integer references geofeeds(id) on delete cascade,
+ network inet,
+ country text,
+ region text,
+ city text
+ );
+ CREATE INDEX IF NOT EXISTS geofeed_networks_geofeed_id
+ ON geofeed_networks(geofeed_id);
+ CREATE INDEX IF NOT EXISTS geofeed_networks_search
+ ON geofeed_networks USING GIST(network inet_ops);
+ CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
+ CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique
+ ON network_geofeeds(network);
+ CREATE INDEX IF NOT EXISTS network_geofeeds_search
+ ON network_geofeeds USING GIST(network inet_ops);
+ CREATE INDEX IF NOT EXISTS network_geofeeds_url
+ ON network_geofeeds(url);
+
+ -- feeds
+ CREATE TABLE IF NOT EXISTS autnum_feeds(
+ number bigint NOT NULL,
+ source text NOT NULL,
+ name text,
+ country text,
+ is_anonymous_proxy boolean,
+ is_satellite_provider boolean,
+ is_anycast boolean,
+ is_drop boolean
+ );
+ CREATE UNIQUE INDEX IF NOT EXISTS autnum_feeds_unique
+ ON autnum_feeds(number, source);
+
+ CREATE TABLE IF NOT EXISTS network_feeds(
+ network inet NOT NULL,
+ source text NOT NULL,
+ country text,
+ is_anonymous_proxy boolean,
+ is_satellite_provider boolean,
+ is_anycast boolean,
+ is_drop boolean
+ );
+ CREATE UNIQUE INDEX IF NOT EXISTS network_feeds_unique
+ ON network_feeds(network, source);
+ CREATE INDEX IF NOT EXISTS network_feeds_search
+ ON network_feeds USING GIST(network inet_ops);
+
-- overrides
CREATE TABLE IF NOT EXISTS autnum_overrides(
number bigint NOT NULL,
return db
+ def fetch_countries(self):
+ """
+ Returns a list of all countries on the list
+ """
+ # Fetch all valid country codes to check parsed networks aganist...
+ countries = self.db.query("SELECT country_code FROM countries ORDER BY country_code")
+
+ return [country.country_code for country in countries]
+
def handle_write(self, ns):
"""
Compiles a database in libloc format out of what is in the database
SELECT
autnums.number AS number,
COALESCE(
- (SELECT overrides.name FROM autnum_overrides overrides
- WHERE overrides.number = autnums.number),
+ overrides.name,
autnums.name
) AS name
- FROM autnums
- WHERE name <> %s ORDER BY number
- """, "")
+ FROM
+ autnums
+ LEFT JOIN
+ autnum_overrides overrides ON autnums.number = overrides.number
+ ORDER BY
+ autnums.number
+ """)
for row in rows:
+ # Skip AS without names
+ if not row.name:
+ continue
+
a = writer.add_as(row.number)
a.name = row.name
UNION
SELECT network FROM networks
UNION
+ SELECT network FROM network_feeds
+ UNION
SELECT network FROM network_overrides
+ UNION
+ SELECT network FROM geofeed_networks
),
ordered_networks AS (
-- Country
COALESCE(
(
- SELECT country FROM network_overrides overrides
- WHERE networks.network <<= overrides.network
- ORDER BY masklen(overrides.network) DESC
- LIMIT 1
+ SELECT
+ country
+ FROM
+ network_overrides overrides
+ WHERE
+ networks.network <<= overrides.network
+ ORDER BY
+ masklen(overrides.network) DESC
+ LIMIT 1
+ ),
+ (
+ SELECT
+ country
+ FROM
+ autnum_overrides overrides
+ WHERE
+ networks.autnum = overrides.number
+ ),
+ (
+ SELECT
+ country
+ FROM
+ network_feeds feeds
+ WHERE
+ networks.network <<= feeds.network
+ ORDER BY
+ masklen(feeds.network) DESC
+ LIMIT 1
+ ),
+ (
+ SELECT
+ country
+ FROM
+ autnum_feeds feeds
+ WHERE
+ networks.autnum = feeds.number
+ ORDER BY
+ source
+ LIMIT 1
),
(
- SELECT country FROM autnum_overrides overrides
- WHERE networks.autnum = overrides.number
+ SELECT
+ geofeed_networks.country AS country
+ FROM
+ network_geofeeds
+
+ -- Join the data from the geofeeds
+ LEFT JOIN
+ geofeeds ON network_geofeeds.url = geofeeds.url
+ LEFT JOIN
+ geofeed_networks ON geofeeds.id = geofeed_networks.geofeed_id
+
+ -- Check whether we have a geofeed for this network
+ WHERE
+ networks.network <<= network_geofeeds.network
+ AND
+ networks.network <<= geofeed_networks.network
+
+ -- Filter for the best result
+ ORDER BY
+ masklen(geofeed_networks.network) DESC
+ LIMIT 1
),
networks.country
) AS country,
-- Flags
COALESCE(
(
- SELECT is_anonymous_proxy FROM network_overrides overrides
- WHERE networks.network <<= overrides.network
- ORDER BY masklen(overrides.network) DESC
- LIMIT 1
+ SELECT
+ is_anonymous_proxy
+ FROM
+ network_overrides overrides
+ WHERE
+ networks.network <<= overrides.network
+ ORDER BY
+ masklen(overrides.network) DESC
+ LIMIT 1
),
(
- SELECT is_anonymous_proxy FROM autnum_overrides overrides
- WHERE networks.autnum = overrides.number
+ SELECT
+ is_anonymous_proxy
+ FROM
+ network_feeds feeds
+ WHERE
+ networks.network <<= feeds.network
+ ORDER BY
+ masklen(feeds.network) DESC
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_anonymous_proxy
+ FROM
+ autnum_feeds feeds
+ WHERE
+ networks.autnum = feeds.number
+ ORDER BY
+ source
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_anonymous_proxy
+ FROM
+ autnum_overrides overrides
+ WHERE
+ networks.autnum = overrides.number
),
FALSE
) AS is_anonymous_proxy,
COALESCE(
(
- SELECT is_satellite_provider FROM network_overrides overrides
- WHERE networks.network <<= overrides.network
- ORDER BY masklen(overrides.network) DESC
- LIMIT 1
+ SELECT
+ is_satellite_provider
+ FROM
+ network_overrides overrides
+ WHERE
+ networks.network <<= overrides.network
+ ORDER BY
+ masklen(overrides.network) DESC
+ LIMIT 1
),
(
- SELECT is_satellite_provider FROM autnum_overrides overrides
- WHERE networks.autnum = overrides.number
+ SELECT
+ is_satellite_provider
+ FROM
+ network_feeds feeds
+ WHERE
+ networks.network <<= feeds.network
+ ORDER BY
+ masklen(feeds.network) DESC
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_satellite_provider
+ FROM
+ autnum_feeds feeds
+ WHERE
+ networks.autnum = feeds.number
+ ORDER BY
+ source
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_satellite_provider
+ FROM
+ autnum_overrides overrides
+ WHERE
+ networks.autnum = overrides.number
),
FALSE
) AS is_satellite_provider,
COALESCE(
(
- SELECT is_anycast FROM network_overrides overrides
- WHERE networks.network <<= overrides.network
- ORDER BY masklen(overrides.network) DESC
- LIMIT 1
+ SELECT
+ is_anycast
+ FROM
+ network_overrides overrides
+ WHERE
+ networks.network <<= overrides.network
+ ORDER BY
+ masklen(overrides.network) DESC
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_anycast
+ FROM
+ network_feeds feeds
+ WHERE
+ networks.network <<= feeds.network
+ ORDER BY
+ masklen(feeds.network) DESC
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_anycast
+ FROM
+ autnum_feeds feeds
+ WHERE
+ networks.autnum = feeds.number
+ ORDER BY
+ source
+ LIMIT 1
),
(
- SELECT is_anycast FROM autnum_overrides overrides
- WHERE networks.autnum = overrides.number
+ SELECT
+ is_anycast
+ FROM
+ autnum_overrides overrides
+ WHERE
+ networks.autnum = overrides.number
),
FALSE
) AS is_anycast,
COALESCE(
(
- SELECT is_drop FROM network_overrides overrides
- WHERE networks.network <<= overrides.network
- ORDER BY masklen(overrides.network) DESC
- LIMIT 1
+ SELECT
+ is_drop
+ FROM
+ network_overrides overrides
+ WHERE
+ networks.network <<= overrides.network
+ ORDER BY
+ masklen(overrides.network) DESC
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_drop
+ FROM
+ network_feeds feeds
+ WHERE
+ networks.network <<= feeds.network
+ ORDER BY
+ masklen(feeds.network) DESC
+ LIMIT 1
+ ),
+ (
+ SELECT
+ is_drop
+ FROM
+ autnum_feeds feeds
+ WHERE
+ networks.autnum = feeds.number
+ ORDER BY
+ source
+ LIMIT 1
),
(
- SELECT is_drop FROM autnum_overrides overrides
- WHERE networks.autnum = overrides.number
+ SELECT
+ is_drop
+ FROM
+ autnum_overrides overrides
+ WHERE
+ networks.autnum = overrides.number
),
FALSE
) AS is_drop
def handle_update_whois(self, ns):
downloader = location.importer.Downloader()
- # Download all sources
- with self.db.transaction():
- # Create some temporary tables to store parsed data
- self.db.execute("""
- CREATE TEMPORARY TABLE _autnums(number integer NOT NULL, organization text NOT NULL, source text NOT NULL)
- ON COMMIT DROP;
- CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
-
- CREATE TEMPORARY TABLE _organizations(handle text NOT NULL, name text NOT NULL, source text NOT NULL)
- ON COMMIT DROP;
- CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
-
- CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL)
- ON COMMIT DROP;
- CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
- CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
- """)
-
- # Remove all previously imported content
- self.db.execute("""
- TRUNCATE TABLE networks;
- """)
-
- # Fetch all valid country codes to check parsed networks aganist...
- rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
- validcountries = []
-
- for row in rows:
- validcountries.append(row.country_code)
+ # Did we run successfully?
+ error = False
- for source_key in location.importer.WHOIS_SOURCES:
- for single_url in location.importer.WHOIS_SOURCES[source_key]:
- with downloader.request(single_url, return_blocks=True) as f:
- for block in f:
- self._parse_block(block, source_key, validcountries)
+ # Fetch all valid country codes to check parsed networks against
+ validcountries = self.fetch_countries()
- # Process all parsed networks from every RIR we happen to have access to,
- # insert the largest network chunks into the networks table immediately...
- families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
+ # Iterate over all potential sources
+ for source in sorted(location.importer.SOURCES):
+ with self.db.transaction():
+ # Create some temporary tables to store parsed data
+ self.db.execute("""
+ CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
+ organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
+ CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
+
+ CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
+ name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
+ CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
+
+ CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL,
+ original_countries text[] NOT NULL, source text NOT NULL)
+ ON COMMIT DROP;
+ CREATE INDEX _rirdata_search ON _rirdata
+ USING BTREE(family(network), masklen(network));
+ CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
+ """)
+
+ # Remove all previously imported content
+ self.db.execute("DELETE FROM autnums WHERE source = %s", source)
+ self.db.execute("DELETE FROM networks WHERE source = %s", source)
- for family in (row.family for row in families):
- smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
+ try:
+ # Fetch WHOIS sources
+ for url in location.importer.WHOIS_SOURCES.get(source, []):
+ for block in downloader.request_blocks(url):
+ self._parse_block(block, source, validcountries)
+
+ # Fetch extended sources
+ for url in location.importer.EXTENDED_SOURCES.get(source, []):
+ for line in downloader.request_lines(url):
+ self._parse_line(line, source, validcountries)
+ except urllib.error.URLError as e:
+ log.error("Could not retrieve data from %s: %s" % (source, e))
+ error = True
+
+ # Continue with the next source
+ continue
- self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
- SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
+ # Process all parsed networks from every RIR we happen to have access to,
+ # insert the largest network chunks into the networks table immediately...
+ families = self.db.query("""
+ SELECT DISTINCT
+ family(network) AS family
+ FROM
+ _rirdata
+ ORDER BY
+ family(network)
+ """,
+ )
- # ... determine any other prefixes for this network family, ...
- prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
- WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family)
+ for family in (row.family for row in families):
+ # Fetch the smallest mask length in our data set
+ smallest = self.db.get("""
+ SELECT
+ MIN(
+ masklen(network)
+ ) AS prefix
+ FROM
+ _rirdata
+ WHERE
+ family(network) = %s
+ """, family,
+ )
- # ... and insert networks with this prefix in case they provide additional
- # information (i. e. subnet of a larger chunk with a different country)
- for prefix in (row.prefix for row in prefixes):
+ # Copy all networks
self.db.execute("""
- WITH candidates AS (
- SELECT
- _rirdata.network,
- _rirdata.country,
- _rirdata.original_countries,
- _rirdata.source
- FROM
- _rirdata
- WHERE
- family(_rirdata.network) = %s
- AND
- masklen(_rirdata.network) = %s
- ),
- filtered AS (
- SELECT
- DISTINCT ON (c.network)
- c.network,
- c.country,
- c.original_countries,
- c.source,
- masklen(networks.network),
- networks.country AS parent_country
- FROM
- candidates c
- LEFT JOIN
- networks
- ON
- c.network << networks.network
- ORDER BY
- c.network,
- masklen(networks.network) DESC NULLS LAST
- )
INSERT INTO
- networks(network, country, original_countries, source)
+ networks
+ (
+ network,
+ country,
+ original_countries,
+ source
+ )
SELECT
network,
country,
original_countries,
source
FROM
- filtered
+ _rirdata
WHERE
- parent_country IS NULL
- OR
- country <> parent_country
- ON CONFLICT DO NOTHING""",
- family, prefix,
+ masklen(network) = %s
+ AND
+ family(network) = %s
+ ON CONFLICT DO
+ NOTHING""",
+ smallest.prefix,
+ family,
)
- self.db.execute("""
- INSERT INTO autnums(number, name, source)
- SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
- JOIN _organizations ON _autnums.organization = _organizations.handle
- ON CONFLICT (number) DO UPDATE SET name = excluded.name;
- """)
+ # ... determine any other prefixes for this network family, ...
+ prefixes = self.db.query("""
+ SELECT
+ DISTINCT masklen(network) AS prefix
+ FROM
+ _rirdata
+ WHERE
+ family(network) = %s
+ ORDER BY
+ masklen(network) ASC
+ OFFSET 1
+ """, family,
+ )
+
+ # ... and insert networks with this prefix in case they provide additional
+ # information (i. e. subnet of a larger chunk with a different country)
+ for prefix in (row.prefix for row in prefixes):
+ self.db.execute("""
+ WITH candidates AS (
+ SELECT
+ _rirdata.network,
+ _rirdata.country,
+ _rirdata.original_countries,
+ _rirdata.source
+ FROM
+ _rirdata
+ WHERE
+ family(_rirdata.network) = %s
+ AND
+ masklen(_rirdata.network) = %s
+ ),
+ filtered AS (
+ SELECT
+ DISTINCT ON (c.network)
+ c.network,
+ c.country,
+ c.original_countries,
+ c.source,
+ masklen(networks.network),
+ networks.country AS parent_country
+ FROM
+ candidates c
+ LEFT JOIN
+ networks
+ ON
+ c.network << networks.network
+ ORDER BY
+ c.network,
+ masklen(networks.network) DESC NULLS LAST
+ )
+ INSERT INTO
+ networks(network, country, original_countries, source)
+ SELECT
+ network,
+ country,
+ original_countries,
+ source
+ FROM
+ filtered
+ WHERE
+ parent_country IS NULL
+ OR
+ country <> parent_country
+ ON CONFLICT DO NOTHING
+ """, family, prefix,
+ )
- # Download all extended sources
- for source_key in location.importer.EXTENDED_SOURCES:
- for single_url in location.importer.EXTENDED_SOURCES[source_key]:
- with self.db.transaction():
- # Download data
- with downloader.request(single_url) as f:
- for line in f:
- self._parse_line(line, source_key, validcountries)
+ self.db.execute("""
+ INSERT INTO
+ autnums
+ (
+ number,
+ name,
+ source
+ )
+ SELECT
+ _autnums.number,
+ _organizations.name,
+ _organizations.source
+ FROM
+ _autnums
+ JOIN
+ _organizations ON _autnums.organization = _organizations.handle
+ ON CONFLICT
+ (
+ number
+ )
+ DO UPDATE
+ SET name = excluded.name
+ """,
+ )
# Download and import (technical) AS names from ARIN
- self._import_as_names_from_arin()
+ with self.db.transaction():
+ self._import_as_names_from_arin(downloader)
+
+ # Return a non-zero exit code for errors
+ return 1 if error else 0
def _check_parsed_network(self, network):
"""
inetnum[key].append(val)
+ # Parse the geofeed attribute
+ elif key == "geofeed":
+ inetnum["geofeed"] = val
+
+ # Parse geofeed when used as a remark
+ elif key == "remarks":
+ m = re.match(r"^(?:Geofeed)\s+(https://.*)", val)
+ if m:
+ inetnum["geofeed"] = m.group(1)
+
# Skip empty objects
if not inetnum or not "country" in inetnum:
return
# them into the database, if _check_parsed_network() succeeded
for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
if self._check_parsed_network(single_network):
-
# Skip objects with unknown country codes if they are valid to avoid log spam...
if validcountries and invalidcountries:
log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
"%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
)
+ # Update any geofeed information
+ geofeed = inetnum.get("geofeed", None)
+ if geofeed:
+ self._parse_geofeed(geofeed, single_network)
+
+ # Delete any previous geofeeds
+ else:
+ self.db.execute("DELETE FROM network_geofeeds WHERE network = %s",
+ "%s" % single_network)
+
+ def _parse_geofeed(self, url, single_network):
+ # Parse the URL
+ url = urllib.parse.urlparse(url)
+
+ # Make sure that this is a HTTPS URL
+ if not url.scheme == "https":
+ log.debug("Geofeed URL is not using HTTPS: %s" % geofeed)
+ return
+
+ # Put the URL back together normalized
+ url = url.geturl()
+
+ # Store/update any geofeeds
+ self.db.execute("""
+ INSERT INTO
+ network_geofeeds(
+ network,
+ url
+ )
+ VALUES(
+ %s, %s
+ )
+ ON CONFLICT (network) DO
+ UPDATE SET url = excluded.url""",
+ "%s" % single_network, url,
+ )
+
def _parse_org_block(self, block, source_key):
org = {}
for line in block:
"%s" % network, country, [country], source_key,
)
- def _import_as_names_from_arin(self):
- downloader = location.importer.Downloader()
+ def _import_as_names_from_arin(self, downloader):
+ # Delete all previously imported content
+ self.db.execute("DELETE FROM autnums WHERE source = %s", "ARIN")
- # XXX: Download AS names file from ARIN (note that these names appear to be quite
- # technical, not intended for human consumption, as description fields in
- # organisation handles for other RIRs are - however, this is what we have got,
- # and in some cases, it might be still better than nothing)
- with downloader.request("https://ftp.arin.net/info/asn.txt", return_blocks=False) as f:
- for line in f:
- # Convert binary line to string...
- line = str(line)
-
- # ... valid lines start with a space, followed by the number of the Autonomous System ...
- if not line.startswith(" "):
- continue
+ # Try to retrieve the feed from ftp.arin.net
+ feed = downloader.request_lines("https://ftp.arin.net/pub/resource_registry_service/asns.csv")
- # Split line and check if there is a valid ASN in it...
- asn, name = line.split()[0:2]
+ # Walk through the file
+ for line in csv.DictReader(feed, dialect="arin"):
+ log.debug("Processing object: %s" % line)
- try:
- asn = int(asn)
- except ValueError:
- log.debug("Skipping ARIN AS names line not containing an integer for ASN")
- continue
+ # Fetch status
+ status = line.get("Status")
- # Filter invalid ASNs...
- if not self._check_parsed_asn(asn):
- continue
+ # We are only interested in anything managed by ARIN
+ if not status == "Full Registry Services":
+ continue
- # Skip any AS name that appears to be a placeholder for a different RIR or entity...
- if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|DNIC|LACNIC|RIPE|IANA)(?:\d?$|\-)", name):
- continue
+ # Fetch organization name
+ name = line.get("Org Name")
+
+ # Extract ASNs
+ first_asn = line.get("Start AS Number")
+ last_asn = line.get("End AS Number")
+
+ # Cast to a number
+ try:
+ first_asn = int(first_asn)
+ except TypeError as e:
+ log.warning("Could not parse ASN '%s'" % first_asn)
+ continue
+
+ try:
+ last_asn = int(last_asn)
+ except TypeError as e:
+ log.warning("Could not parse ASN '%s'" % last_asn)
+ continue
- # Bail out in case the AS name contains anything we do not expect here...
- if re.search(r"[^a-zA-Z0-9-_]", name):
- log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
- (asn, name))
+ # Check if the range is valid
+ if last_asn < first_asn:
+ log.warning("Invalid ASN range %s-%s" % (first_asn, last_asn))
+
+ # Insert everything into the database
+ for asn in range(first_asn, last_asn + 1):
+ if not self._check_parsed_asn(asn):
+ log.warning("Skipping invalid ASN %s" % asn)
+ continue
- # Things look good here, run INSERT statement and skip this one if we already have
- # a (better?) name for this Autonomous System...
self.db.execute("""
- INSERT INTO autnums(
+ INSERT INTO
+ autnums
+ (
number,
name,
source
- ) VALUES (%s, %s, %s)
- ON CONFLICT (number) DO NOTHING""",
- asn,
- name,
- "ARIN",
+ )
+ VALUES
+ (
+ %s, %s, %s
+ )
+ ON CONFLICT
+ (
+ number
+ )
+ DO NOTHING
+ """, asn, name, "ARIN",
)
def handle_update_announcements(self, ns):
with self.db.transaction():
if server.startswith("/"):
self._handle_update_announcements_from_bird(server)
- else:
- self._handle_update_announcements_from_telnet(server)
# Purge anything we never want here
self.db.execute("""
# We don't need to process any more
break
- def _handle_update_announcements_from_telnet(self, server):
- # Pre-compile regular expression for routes
- route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
-
- with telnetlib.Telnet(server) as t:
- # Enable debug mode
- #if ns.debug:
- # t.set_debuglevel(10)
-
- # Wait for console greeting
- greeting = t.read_until(b"> ", timeout=30)
- if not greeting:
- log.error("Could not get a console prompt")
- return 1
-
- # Disable pagination
- t.write(b"terminal length 0\n")
-
- # Wait for the prompt to return
- t.read_until(b"> ")
-
- # Fetch the routing tables
- for protocol in ("ipv6", "ipv4"):
- log.info("Requesting %s routing table" % protocol)
-
- # Request the full unicast routing table
- t.write(b"show bgp %s unicast\n" % protocol.encode())
-
- # Read entire header which ends with "Path"
- t.read_until(b"Path\r\n")
-
- while True:
- # Try reading a full entry
- # Those might be broken across multiple lines but ends with i
- line = t.read_until(b"i\r\n", timeout=5)
- if not line:
- break
-
- # Show line for debugging
- #log.debug(repr(line))
-
- # Try finding a route in here
- m = route.match(line)
- if m:
- network, autnum = m.groups()
-
- # Convert network to string
- network = network.decode()
-
- # Append /24 for IPv4 addresses
- if not "/" in network and not ":" in network:
- network = "%s/24" % network
-
- # Convert AS number to integer
- autnum = int(autnum)
-
- log.info("Found announcement for %s by %s" % (network, autnum))
-
- self.db.execute("INSERT INTO announcements(network, autnum) \
- VALUES(%s, %s) ON CONFLICT (network) DO \
- UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
- network, autnum,
- )
-
- log.info("Finished reading the %s routing table" % protocol)
-
def _bird_cmd(self, socket_path, command):
# Connect to the socket
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
# Otherwise return the line
yield line
- def handle_update_overrides(self, ns):
+ def handle_update_geofeeds(self, ns):
+ # Sync geofeeds
+ with self.db.transaction():
+ # Delete all geofeeds which are no longer linked
+ self.db.execute("""
+ DELETE FROM
+ geofeeds
+ WHERE
+ NOT EXISTS (
+ SELECT
+ 1
+ FROM
+ network_geofeeds
+ WHERE
+ geofeeds.url = network_geofeeds.url
+ )""",
+ )
+
+ # Copy all geofeeds
+ self.db.execute("""
+ INSERT INTO
+ geofeeds(
+ url
+ )
+ SELECT
+ url
+ FROM
+ network_geofeeds
+ ON CONFLICT (url)
+ DO NOTHING
+ """,
+ )
+
+ # Fetch all Geofeeds that require an update
+ geofeeds = self.db.query("""
+ SELECT
+ id,
+ url
+ FROM
+ geofeeds
+ WHERE
+ updated_at IS NULL
+ OR
+ updated_at <= CURRENT_TIMESTAMP - INTERVAL '1 week'
+ ORDER BY
+ id
+ """)
+
+ with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
+ results = executor.map(self._fetch_geofeed, geofeeds)
+
+ # Fetch all results to raise any exceptions
+ for result in results:
+ pass
+
+ # Delete data from any feeds that did not update in the last two weeks
with self.db.transaction():
- # Drop all data that we have
self.db.execute("""
- TRUNCATE TABLE autnum_overrides;
- TRUNCATE TABLE network_overrides;
+ DELETE FROM
+ geofeed_networks
+ WHERE
+ geofeed_networks.geofeed_id IN (
+ SELECT
+ geofeeds.id
+ FROM
+ geofeeds
+ WHERE
+ updated_at IS NULL
+ OR
+ updated_at <= CURRENT_TIMESTAMP - INTERVAL '2 weeks'
+ )
""")
- # Update overrides for various cloud providers big enough to publish their own IP
- # network allocation lists in a machine-readable format...
- self._update_overrides_for_aws()
+ def _fetch_geofeed(self, geofeed):
+ log.debug("Fetching Geofeed %s" % geofeed.url)
+
+ with self.db.transaction():
+ # Open the URL
+ try:
+ req = urllib.request.Request(geofeed.url, headers={
+ "User-Agent" : "location/%s" % location.__version__,
+
+ # We expect some plain text file in CSV format
+ "Accept" : "text/csv, text/plain",
+ })
+
+ # XXX set proxy
+
+ # Send the request
+ with urllib.request.urlopen(req, timeout=10) as f:
+ # Remove any previous data
+ self.db.execute("DELETE FROM geofeed_networks \
+ WHERE geofeed_id = %s", geofeed.id)
+
+ lineno = 0
+
+ # Read the output line by line
+ for line in f:
+ lineno += 1
+
+ try:
+ line = line.decode()
+
+ # Ignore any lines we cannot decode
+ except UnicodeDecodeError:
+ log.debug("Could not decode line %s in %s" \
+ % (lineno, geofeed.url))
+ continue
+
+ # Strip any newline
+ line = line.rstrip()
+
+ # Skip empty lines
+ if not line:
+ continue
+
+ # Try to parse the line
+ try:
+ fields = line.split(",", 5)
+ except ValueError:
+ log.debug("Could not parse line: %s" % line)
+ continue
+
+ # Check if we have enough fields
+ if len(fields) < 4:
+ log.debug("Not enough fields in line: %s" % line)
+ continue
+
+ # Fetch all fields
+ network, country, region, city, = fields[:4]
+
+ # Try to parse the network
+ try:
+ network = ipaddress.ip_network(network, strict=False)
+ except ValueError:
+ log.debug("Could not parse network: %s" % network)
+ continue
+
+ # Strip any excess whitespace from country codes
+ country = country.strip()
+
+ # Make the country code uppercase
+ country = country.upper()
+
+ # Check the country code
+ if not country:
+ log.debug("Empty country code in Geofeed %s line %s" \
+ % (geofeed.url, lineno))
+ continue
+
+ elif not location.country_code_is_valid(country):
+ log.debug("Invalid country code in Geofeed %s:%s: %s" \
+ % (geofeed.url, lineno, country))
+ continue
+
+ # Write this into the database
+ self.db.execute("""
+ INSERT INTO
+ geofeed_networks (
+ geofeed_id,
+ network,
+ country,
+ region,
+ city
+ )
+ VALUES (%s, %s, %s, %s, %s)""",
+ geofeed.id,
+ "%s" % network,
+ country,
+ region,
+ city,
+ )
+
+ # Catch any HTTP errors
+ except urllib.request.HTTPError as e:
+ self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
+ WHERE id = %s", e.code, "%s" % e, geofeed.id)
+
+ # Remove any previous data when the feed has been deleted
+ if e.code == 404:
+ self.db.execute("DELETE FROM geofeed_networks \
+ WHERE geofeed_id = %s", geofeed.id)
- # Update overrides for Spamhaus DROP feeds...
- self._update_overrides_for_spamhaus_drop()
+ # Catch any other errors and connection timeouts
+ except (http.client.InvalidURL, urllib.request.URLError, TimeoutError) as e:
+ log.debug("Could not fetch URL %s: %s" % (geofeed.url, e))
+
+ self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
+ WHERE id = %s", 599, "%s" % e, geofeed.id)
+
+ # Mark the geofeed as updated
+ else:
+ self.db.execute("""
+ UPDATE
+ geofeeds
+ SET
+ updated_at = CURRENT_TIMESTAMP,
+ status = NULL,
+ error = NULL
+ WHERE
+ id = %s""",
+ geofeed.id,
+ )
+
+ def handle_update_overrides(self, ns):
+ with self.db.transaction():
+ # Only drop manually created overrides, as we can be reasonably sure to have them,
+ # and preserve the rest. If appropriate, it is deleted by correspondent functions.
+ self.db.execute("""
+ DELETE FROM autnum_overrides WHERE source = 'manual';
+ DELETE FROM network_overrides WHERE source = 'manual';
+ """)
for file in ns.files:
log.info("Reading %s..." % file)
else:
log.warning("Unsupported type: %s" % type)
- def _update_overrides_for_aws(self):
- # Download Amazon AWS IP allocation file to create overrides...
+ def handle_update_feeds(self, ns):
+ """
+ Update any third-party feeds
+ """
+ success = True
+
+ # Create a downloader
downloader = location.importer.Downloader()
- try:
- with downloader.request("https://ip-ranges.amazonaws.com/ip-ranges.json", return_blocks=False) as f:
- aws_ip_dump = json.load(f.body)
- except Exception as e:
- log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
- return
+ feeds = (
+ # AWS IP Ranges
+ ("AWS-IP-RANGES", self._import_aws_ip_ranges, "https://ip-ranges.amazonaws.com/ip-ranges.json"),
+ )
+
+ # Walk through all feeds
+ for name, callback, url, *args in feeds:
+ try:
+ self._process_feed(downloader, name, callback, url, *args)
+
+ # Log an error but continue if an exception occurs
+ except Exception as e:
+ log.error("Error processing feed '%s': %s" % (name, e))
+ success = False
- # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
+ # Spamhaus
+ self._update_feed_for_spamhaus_drop()
+
+ # Return status
+ return 0 if success else 1
+
+ def _process_feed(self, downloader, name, callback, url, *args):
+ """
+ Processes one feed
+ """
+ # Open the URL
+ f = downloader.retrieve(url)
+
+ with self.db.transaction():
+ # Drop any previous content
+ self.db.execute("DELETE FROM autnum_feeds WHERE source = %s", name)
+ self.db.execute("DELETE FROM network_feeds WHERE source = %s", name)
+
+ # Call the callback to process the feed
+ return callback(name, f, *args)
+
+ def _import_aws_ip_ranges(self, name, f):
+ # Parse the feed
+ aws_ip_dump = json.load(f)
+
+ # Set up a dictionary for mapping a region name to a country. Unfortunately,
# there seems to be no machine-readable version available of this other than
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
# (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
# was helpful here as well.
aws_region_country_map = {
- "af-south-1": "ZA",
- "ap-east-1": "HK",
- "ap-south-1": "IN",
- "ap-south-2": "IN",
- "ap-northeast-3": "JP",
- "ap-northeast-2": "KR",
- "ap-southeast-1": "SG",
- "ap-southeast-2": "AU",
- "ap-southeast-3": "MY",
- "ap-southeast-4": "AU",
- "ap-northeast-1": "JP",
- "ca-central-1": "CA",
- "eu-central-1": "DE",
- "eu-central-2": "CH",
- "eu-west-1": "IE",
- "eu-west-2": "GB",
- "eu-south-1": "IT",
- "eu-south-2": "ES",
- "eu-west-3": "FR",
- "eu-north-1": "SE",
- "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
- "me-central-1": "AE",
- "me-south-1": "BH",
- "sa-east-1": "BR"
- }
-
- # Fetch all valid country codes to check parsed networks aganist...
- rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
- validcountries = []
-
- for row in rows:
- validcountries.append(row.country_code)
-
- with self.db.transaction():
- for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
- try:
- network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
- except ValueError:
- log.warning("Unable to parse line: %s" % snetwork)
- continue
+ # Africa
+ "af-south-1" : "ZA",
+
+ # Asia
+ "il-central-1" : "IL", # Tel Aviv
+
+ # Asia/Pacific
+ "ap-northeast-1" : "JP",
+ "ap-northeast-2" : "KR",
+ "ap-northeast-3" : "JP",
+ "ap-east-1" : "HK",
+ "ap-south-1" : "IN",
+ "ap-south-2" : "IN",
+ "ap-southeast-1" : "SG",
+ "ap-southeast-2" : "AU",
+ "ap-southeast-3" : "MY",
+ "ap-southeast-4" : "AU",
+ "ap-southeast-5" : "NZ", # Auckland, NZ
+ "ap-southeast-6" : "AP", # XXX: Precise location not documented anywhere
+
+ # Canada
+ "ca-central-1" : "CA",
+ "ca-west-1" : "CA",
+
+ # Europe
+ "eu-central-1" : "DE",
+ "eu-central-2" : "CH",
+ "eu-north-1" : "SE",
+ "eu-west-1" : "IE",
+ "eu-west-2" : "GB",
+ "eu-west-3" : "FR",
+ "eu-south-1" : "IT",
+ "eu-south-2" : "ES",
+
+ # Middle East
+ "me-central-1" : "AE",
+ "me-south-1" : "BH",
+
+ # South America
+ "sa-east-1" : "BR",
+
+ # Undocumented, likely located in Berlin rather than Frankfurt
+ "eusc-de-east-1" : "DE",
+ }
+
+ # Fetch all countries that we know of
+ countries = self.fetch_countries()
+
+ for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
+ try:
+ network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
+ except ValueError:
+ log.warning("Unable to parse line: %s" % snetwork)
+ continue
- # Sanitize parsed networks...
- if not self._check_parsed_network(network):
- continue
+ # Sanitize parsed networks...
+ if not self._check_parsed_network(network):
+ continue
- # Determine region of this network...
- region = snetwork["region"]
- cc = None
- is_anycast = False
-
- # Any region name starting with "us-" will get "US" country code assigned straight away...
- if region.startswith("us-"):
- cc = "US"
- elif region.startswith("cn-"):
- # ... same goes for China ...
- cc = "CN"
- elif region == "GLOBAL":
- # ... funny region name for anycast-like networks ...
- is_anycast = True
- elif region in aws_region_country_map:
- # ... assign looked up country code otherwise ...
- cc = aws_region_country_map[region]
- else:
- # ... and bail out if we are missing something here
- log.warning("Unable to determine country code for line: %s" % snetwork)
- continue
+ # Determine region of this network...
+ region = snetwork["region"]
+ cc = None
+ is_anycast = False
+
+ # Any region name starting with "us-" will get "US" country code assigned straight away...
+ if region.startswith("us-"):
+ cc = "US"
+ elif region.startswith("cn-"):
+ # ... same goes for China ...
+ cc = "CN"
+ elif region == "GLOBAL":
+ # ... funny region name for anycast-like networks ...
+ is_anycast = True
+ elif region in aws_region_country_map:
+ # ... assign looked up country code otherwise ...
+ cc = aws_region_country_map[region]
+ else:
+ # ... and bail out if we are missing something here
+ log.warning("Unable to determine country code for line: %s" % snetwork)
+ continue
- # Skip networks with unknown country codes
- if not is_anycast and validcountries and cc not in validcountries:
- log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
- (cc, network))
- return
+ # Skip networks with unknown country codes
+ if not is_anycast and countries and cc not in countries:
+ log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
+ (cc, network))
+ return
- # Conduct SQL statement...
- self.db.execute("""
- INSERT INTO network_overrides(
- network,
- country,
- source,
- is_anonymous_proxy,
- is_satellite_provider,
- is_anycast
- ) VALUES (%s, %s, %s, %s, %s, %s)
- ON CONFLICT (network) DO NOTHING""",
- "%s" % network,
- cc,
- "Amazon AWS IP feed",
- None,
- None,
- is_anycast,
+ # Conduct SQL statement...
+ self.db.execute("""
+ INSERT INTO
+ network_feeds
+ (
+ network,
+ source,
+ country,
+ is_anycast
)
+ VALUES
+ (
+ %s, %s, %s, %s
+ )
+ ON CONFLICT (network, source) DO NOTHING
+ """, "%s" % network, "Amazon AWS IP feed", cc, is_anycast,
+ )
-
- def _update_overrides_for_spamhaus_drop(self):
+ def _update_feed_for_spamhaus_drop(self):
downloader = location.importer.Downloader()
- ip_urls = [
- "https://www.spamhaus.org/drop/drop.txt",
- "https://www.spamhaus.org/drop/edrop.txt",
- "https://www.spamhaus.org/drop/dropv6.txt"
+ ip_lists = [
+ ("SPAMHAUS-DROP", "https://www.spamhaus.org/drop/drop.txt"),
+ ("SPAMHAUS-EDROP", "https://www.spamhaus.org/drop/edrop.txt"),
+ ("SPAMHAUS-DROPV6", "https://www.spamhaus.org/drop/dropv6.txt")
]
- asn_urls = [
- "https://www.spamhaus.org/drop/asndrop.txt"
+ asn_lists = [
+ ("SPAMHAUS-ASNDROP", "https://www.spamhaus.org/drop/asndrop.json")
]
- for url in ip_urls:
- try:
- with downloader.request(url, return_blocks=False) as f:
- fcontent = f.body.readlines()
- except Exception as e:
- log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e))
- return
+ for name, url in ip_lists:
+ # Fetch IP list from given URL
+ f = downloader.retrieve(url)
+
+ # Split into lines
+ fcontent = f.readlines()
- # Iterate through every line, filter comments and add remaining networks to
- # the override table in case they are valid...
with self.db.transaction():
- for sline in fcontent:
+ # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
+ # downloads.
+ if len(fcontent) > 10:
+ self.db.execute("DELETE FROM network_feeds WHERE source = %s", name)
+ else:
+ log.warning("%s (%s) returned likely bogus file, ignored" % (name, url))
+ continue
+ # Iterate through every line, filter comments and add remaining networks to
+ # the override table in case they are valid...
+ for sline in fcontent:
# The response is assumed to be encoded in UTF-8...
sline = sline.decode("utf-8")
# Sanitize parsed networks...
if not self._check_parsed_network(network):
- log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
- (url, network))
+ log.warning("Skipping bogus network found in %s (%s): %s" % \
+ (name, url, network))
continue
# Conduct SQL statement...
self.db.execute("""
- INSERT INTO network_overrides(
+ INSERT INTO
+ network_feeds
+ (
network,
source,
is_drop
- ) VALUES (%s, %s, %s)
- ON CONFLICT (network) DO UPDATE SET is_drop = True""",
- "%s" % network,
- "Spamhaus DROP lists",
- True
+ )
+ VALUES
+ (
+ %s, %s, %s
+ )""", "%s" % network, name, True,
)
- for url in asn_urls:
- try:
- with downloader.request(url, return_blocks=False) as f:
- fcontent = f.body.readlines()
- except Exception as e:
- log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e))
- return
+ for name, url in asn_lists:
+ # Fetch URL
+ f = downloader.retrieve(url)
+
+ # Split into lines
+ fcontent = f.readlines()
- # Iterate through every line, filter comments and add remaining ASNs to
- # the override table in case they are valid...
with self.db.transaction():
- for sline in fcontent:
+ # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
+ # downloads.
+ if len(fcontent) > 10:
+ self.db.execute("DELETE FROM autnum_feeds WHERE source = %s", name)
+ else:
+ log.warning("%s (%s) returned likely bogus file, ignored" % (name, url))
+ continue
+ # Iterate through every line, filter comments and add remaining ASNs to
+ # the override table in case they are valid...
+ for sline in fcontent:
# The response is assumed to be encoded in UTF-8...
sline = sline.decode("utf-8")
- # Comments start with a semicolon...
- if sline.startswith(";"):
+ # Load every line as a JSON object and try to obtain an ASN from it...
+ try:
+ lineobj = json.loads(sline)
+ except json.decoder.JSONDecodeError:
+ log.error("Unable to parse line as a JSON object: %s" % sline)
continue
- # Throw away anything after the first space...
- sline = sline.split()[0]
+ # Skip line contiaining file metadata
+ try:
+ type = lineobj["type"]
- # ... strip the "AS" prefix from it ...
- sline = sline.strip("AS")
+ if type == "metadata":
+ continue
+ except KeyError:
+ pass
- # ... and convert it into an integer. Voila.
- asn = int(sline)
+ try:
+ asn = lineobj["asn"]
+ as_name = lineobj["asname"]
+ except KeyError:
+ log.warning("Unable to extract necessary information from line: %s" % sline)
+ continue
# Filter invalid ASNs...
if not self._check_parsed_asn(asn):
- log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
- (url, asn))
+ log.warning("Skipping bogus ASN found in %s (%s): %s" % \
+ (name, url, asn))
continue
# Conduct SQL statement...
self.db.execute("""
- INSERT INTO autnum_overrides(
+ INSERT INTO
+ autnum_feeds
+ (
number,
source,
is_drop
- ) VALUES (%s, %s, %s)
- ON CONFLICT (number) DO UPDATE SET is_drop = True""",
- "%s" % asn,
- "Spamhaus ASN-DROP list",
- True
+ )
+ VALUES
+ (
+ %s, %s, %s
+ )""", "%s" % asn, name, True,
)
@staticmethod