last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
+ CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
-- autnums
CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
-- networks
CREATE TABLE IF NOT EXISTS networks(network inet, country text);
CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
+ CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
-- overrides
);
CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
ON network_overrides(network);
+ CREATE INDEX IF NOT EXISTS network_overrides_search
+ ON network_overrides USING GIST(network inet_ops);
""")
return db
# Select all known networks
rows = self.db.query("""
- -- Get a (sorted) list of all known networks
- WITH known_networks AS (
- SELECT network FROM announcements
- UNION
- SELECT network FROM networks
- ORDER BY network
- )
-
-- Return a list of those networks enriched with all
-- other information that we store in the database
SELECT
- DISTINCT ON (known_networks.network)
- known_networks.network AS network,
- announcements.autnum AS autnum,
+ DISTINCT ON (network)
+ network,
+ autnum,
-- Country
COALESCE(
(
SELECT country FROM network_overrides overrides
- WHERE announcements.network <<= overrides.network
+ WHERE networks.network <<= overrides.network
ORDER BY masklen(overrides.network) DESC
LIMIT 1
),
(
SELECT country FROM autnum_overrides overrides
- WHERE announcements.autnum = overrides.number
+ WHERE networks.autnum = overrides.number
),
networks.country
) AS country,
COALESCE(
(
SELECT is_anonymous_proxy FROM network_overrides overrides
- WHERE announcements.network <<= overrides.network
+ WHERE networks.network <<= overrides.network
ORDER BY masklen(overrides.network) DESC
LIMIT 1
),
(
SELECT is_anonymous_proxy FROM autnum_overrides overrides
- WHERE announcements.autnum = overrides.number
+ WHERE networks.autnum = overrides.number
),
FALSE
) AS is_anonymous_proxy,
COALESCE(
(
SELECT is_satellite_provider FROM network_overrides overrides
- WHERE announcements.network <<= overrides.network
+ WHERE networks.network <<= overrides.network
ORDER BY masklen(overrides.network) DESC
LIMIT 1
),
(
SELECT is_satellite_provider FROM autnum_overrides overrides
- WHERE announcements.autnum = overrides.number
+ WHERE networks.autnum = overrides.number
),
FALSE
) AS is_satellite_provider,
COALESCE(
(
SELECT is_anycast FROM network_overrides overrides
- WHERE announcements.network <<= overrides.network
+ WHERE networks.network <<= overrides.network
ORDER BY masklen(overrides.network) DESC
LIMIT 1
),
(
SELECT is_anycast FROM autnum_overrides overrides
- WHERE announcements.autnum = overrides.number
+ WHERE networks.autnum = overrides.number
),
FALSE
- ) AS is_anycast,
-
- -- Must be part of returned values for ORDER BY clause
- masklen(announcements.network) AS sort_a,
- masklen(networks.network) AS sort_b
- FROM known_networks
- LEFT JOIN announcements ON known_networks.network <<= announcements.network
- LEFT JOIN networks ON known_networks.network <<= networks.network
- ORDER BY known_networks.network, sort_a DESC, sort_b DESC
+ ) AS is_anycast
+ FROM (
+ SELECT
+ known_networks.network AS network,
+ announcements.autnum AS autnum,
+ networks.country AS country,
+
+ -- Must be part of returned values for ORDER BY clause
+ masklen(announcements.network) AS sort_a,
+ masklen(networks.network) AS sort_b
+ FROM (
+ SELECT network FROM announcements
+ UNION ALL
+ SELECT network FROM networks
+ UNION ALL
+ SELECT network FROM network_overrides
+ ) known_networks
+ LEFT JOIN
+ announcements ON known_networks.network <<= announcements.network
+ LEFT JOIN
+ networks ON known_networks.network <<= networks.network
+ ORDER BY
+ known_networks.network,
+ sort_a DESC,
+ sort_b DESC
+ ) networks
""")
for row in rows:
CREATE TEMPORARY TABLE _organizations(handle text, name text NOT NULL)
ON COMMIT DROP;
CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
+
+ CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL)
+ ON COMMIT DROP;
+ CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
+ CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
+ """)
+
+ # Remove all previously imported content
+ self.db.execute("""
+ TRUNCATE TABLE networks;
""")
for source in location.importer.WHOIS_SOURCES:
for block in f:
self._parse_block(block)
+ # Process all parsed networks from every RIR we happen to have access to,
+ # insert the largest network chunks into the networks table immediately...
+ families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
+
+ for family in (row.family for row in families):
+ smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
+
+ self.db.execute("INSERT INTO networks(network, country) \
+ SELECT network, country FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
+
+ # ... determine any other prefixes for this network family, ...
+ prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
+ WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family)
+
+ # ... and insert networks with this prefix in case they provide additional
+ # information (i. e. subnet of a larger chunk with a different country)
+ for prefix in (row.prefix for row in prefixes):
+ self.db.execute("""
+ WITH candidates AS (
+ SELECT
+ _rirdata.network,
+ _rirdata.country
+ FROM
+ _rirdata
+ WHERE
+ family(_rirdata.network) = %s
+ AND
+ masklen(_rirdata.network) = %s
+ ),
+ filtered AS (
+ SELECT
+ DISTINCT ON (c.network)
+ c.network,
+ c.country,
+ masklen(networks.network),
+ networks.country AS parent_country
+ FROM
+ candidates c
+ LEFT JOIN
+ networks
+ ON
+ c.network << networks.network
+ ORDER BY
+ c.network,
+ masklen(networks.network) DESC NULLS LAST
+ )
+ INSERT INTO
+ networks(network, country)
+ SELECT
+ network,
+ country
+ FROM
+ filtered
+ WHERE
+ parent_country IS NULL
+ OR
+ country <> parent_country
+ ON CONFLICT DO NOTHING""",
+ family, prefix,
+ )
+
self.db.execute("""
INSERT INTO autnums(number, name)
SELECT _autnums.number, _organizations.name FROM _autnums
for line in f:
self._parse_line(line)
+ def _check_parsed_network(self, network):
+ """
+ Assistive function to detect and subsequently sort out parsed
+ networks from RIR data (both Whois and so-called "extended sources"),
+ which are or have...
+
+ (a) not globally routable (RFC 1918 space, et al.)
+ (b) covering a too large chunk of the IP address space (prefix length
+ is < 7 for IPv4 networks, and < 10 for IPv6)
+ (c) "0.0.0.0" or "::" as a network address
+ (d) are too small for being publicly announced (we have decided not to
+ process them at the moment, as they significantly enlarge our
+ database without providing very helpful additional information)
+
+ This unfortunately is necessary due to brain-dead clutter across
+ various RIR databases, causing mismatches and eventually disruptions.
+
+ We will return False in case a network is not suitable for adding
+ it to our database, and True otherwise.
+ """
+
+ if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
+ return False
+
+ if not network.is_global:
+ log.debug("Skipping non-globally routable network: %s" % network)
+ return False
+
+ if network.version == 4:
+ if network.prefixlen < 7:
+ log.debug("Skipping too big IP chunk: %s" % network)
+ return False
+
+ if network.prefixlen > 24:
+ log.debug("Skipping network too small to be publicly announced: %s" % network)
+ return False
+
+ if str(network.network_address) == "0.0.0.0":
+ log.debug("Skipping network based on 0.0.0.0: %s" % network)
+ return False
+
+ elif network.version == 6:
+ if network.prefixlen < 10:
+ log.debug("Skipping too big IP chunk: %s" % network)
+ return False
+
+ if network.prefixlen > 48:
+ log.debug("Skipping network too small to be publicly announced: %s" % network)
+ return False
+
+ if str(network.network_address) == "::":
+ log.debug("Skipping network based on '::': %s" % network)
+ return False
+
+ else:
+ # This should not happen...
+ log.warning("Skipping network of unknown family, this should not happen: %s" % network)
+ return False
+
+ # In case we have made it here, the network is considered to
+ # be suitable for libloc consumption...
+ return True
+
def _parse_block(self, block):
# Get first line to find out what type of block this is
line = block[0]
autnum["asn"] = m.group(2)
elif key == "org":
- autnum[key] = val
+ autnum[key] = val.upper()
# Skip empty objects
if not autnum:
)
def _parse_inetnum_block(self, block):
- logging.debug("Parsing inetnum block:")
+ log.debug("Parsing inetnum block:")
inetnum = {}
for line in block:
- logging.debug(line)
+ log.debug(line)
# Split line
key, val = split_line(line)
+ # Filter any inetnum records which are only referring to IP space
+ # not managed by that specific RIR...
+ if key == "netname":
+ if re.match(r"(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK)", val.strip()):
+ log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
+ return
+
if key == "inetnum":
start_address, delim, end_address = val.partition("-")
start_address = ipaddress.ip_address(start_address)
end_address = ipaddress.ip_address(end_address)
except ValueError:
- logging.warning("Could not parse line: %s" % line)
+ log.warning("Could not parse line: %s" % line)
return
# Set prefix to default
inetnum[key] = val
elif key == "country":
- if val == "UNITED STATES":
- val = "US"
-
inetnum[key] = val.upper()
# Skip empty objects
- if not inetnum:
+ if not inetnum or not "country" in inetnum:
+ return
+
+ # Skip objects with bogus country code 'ZZ'
+ if inetnum.get("country") == "ZZ":
+ log.warning("Skipping network with bogus country 'ZZ': %s" % \
+ (inetnum.get("inet6num") or inetnum.get("inetnum")))
return
network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False)
- # Bail out in case we have processed a non-public IP network
- if network.is_private:
- logging.warning("Skipping non-globally routable network: %s" % network)
+ if not self._check_parsed_network(network):
return
- self.db.execute("INSERT INTO networks(network, country) \
+ self.db.execute("INSERT INTO _rirdata(network, country) \
VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
"%s" % network, inetnum.get("country"),
)
# Split line
key, val = split_line(line)
- if key in ("organisation", "org-name"):
+ if key == "organisation":
+ org[key] = val.upper()
+ elif key == "org-name":
org[key] = val
# Skip empty objects
log.warning("Invalid IP address: %s" % address)
return
+ if not self._check_parsed_network(network):
+ return
+
self.db.execute("INSERT INTO networks(network, country) \
VALUES(%s, %s) ON CONFLICT (network) DO \
UPDATE SET country = excluded.country",
-- DELETE link local address space
DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
- -- DELETE IPv6 to IPv4 (6to4) address space
+ -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
+ DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
-- DELETE multicast and reserved address space
DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
)
else:
- log.warning("Unsupport type: %s" % type)
+ log.warning("Unsupported type: %s" % type)
@staticmethod
def _parse_bool(block, key):