2 ###############################################################################
4 # libloc - A library to determine the location of someone on the Internet #
6 # Copyright (C) 2020-2022 IPFire Development Team <info@ipfire.org> #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
18 ###############################################################################
30 # Load our location module
32 import location
.database
33 import location
.importer
34 from location
.i18n
import _
37 log
= logging
.getLogger("location.importer")
50 parser
= argparse
.ArgumentParser(
51 description
=_("Location Importer Command Line Interface"),
53 subparsers
= parser
.add_subparsers()
55 # Global configuration flags
56 parser
.add_argument("--debug", action
="store_true",
57 help=_("Enable debug output"))
58 parser
.add_argument("--quiet", action
="store_true",
59 help=_("Enable quiet mode"))
62 parser
.add_argument("--version", action
="version",
63 version
="%(prog)s @VERSION@")
66 parser
.add_argument("--database-host", required
=True,
67 help=_("Database Hostname"), metavar
=_("HOST"))
68 parser
.add_argument("--database-name", required
=True,
69 help=_("Database Name"), metavar
=_("NAME"))
70 parser
.add_argument("--database-username", required
=True,
71 help=_("Database Username"), metavar
=_("USERNAME"))
72 parser
.add_argument("--database-password", required
=True,
73 help=_("Database Password"), metavar
=_("PASSWORD"))
76 write
= subparsers
.add_parser("write", help=_("Write database to file"))
77 write
.set_defaults(func
=self
.handle_write
)
78 write
.add_argument("file", nargs
=1, help=_("Database File"))
79 write
.add_argument("--signing-key", nargs
="?", type=open, help=_("Signing Key"))
80 write
.add_argument("--backup-signing-key", nargs
="?", type=open, help=_("Backup Signing Key"))
81 write
.add_argument("--vendor", nargs
="?", help=_("Sets the vendor"))
82 write
.add_argument("--description", nargs
="?", help=_("Sets a description"))
83 write
.add_argument("--license", nargs
="?", help=_("Sets the license"))
84 write
.add_argument("--version", type=int, help=_("Database Format Version"))
87 update_whois
= subparsers
.add_parser("update-whois", help=_("Update WHOIS Information"))
88 update_whois
.set_defaults(func
=self
.handle_update_whois
)
90 # Update announcements
91 update_announcements
= subparsers
.add_parser("update-announcements",
92 help=_("Update BGP Annoucements"))
93 update_announcements
.set_defaults(func
=self
.handle_update_announcements
)
94 update_announcements
.add_argument("server", nargs
=1,
95 help=_("Route Server to connect to"), metavar
=_("SERVER"))
98 update_overrides
= subparsers
.add_parser("update-overrides",
99 help=_("Update overrides"),
101 update_overrides
.add_argument(
102 "files", nargs
="+", help=_("Files to import"),
104 update_overrides
.set_defaults(func
=self
.handle_update_overrides
)
107 import_countries
= subparsers
.add_parser("import-countries",
108 help=_("Import countries"),
110 import_countries
.add_argument("file", nargs
=1, type=argparse
.FileType("r"),
111 help=_("File to import"))
112 import_countries
.set_defaults(func
=self
.handle_import_countries
)
114 args
= parser
.parse_args()
118 location
.logger
.set_level(logging
.DEBUG
)
120 location
.logger
.set_level(logging
.WARNING
)
122 # Print usage if no action was given
123 if not "func" in args
:
130 # Parse command line arguments
131 args
= self
.parse_cli()
133 # Initialise database
134 self
.db
= self
._setup
_database
(args
)
137 ret
= args
.func(args
)
139 # Return with exit code
143 # Otherwise just exit
146 def _setup_database(self
, ns
):
148 Initialise the database
150 # Connect to database
151 db
= location
.database
.Connection(
152 host
=ns
.database_host
, database
=ns
.database_name
,
153 user
=ns
.database_username
, password
=ns
.database_password
,
156 with db
.transaction():
159 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
160 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
161 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
162 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
163 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
164 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
167 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
168 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
169 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
172 CREATE TABLE IF NOT EXISTS countries(
173 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
174 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
177 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
178 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
179 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
180 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
181 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
182 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
185 CREATE TABLE IF NOT EXISTS autnum_overrides(
186 number bigint NOT NULL,
189 is_anonymous_proxy boolean,
190 is_satellite_provider boolean,
193 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
194 ON autnum_overrides(number);
195 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
196 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
198 CREATE TABLE IF NOT EXISTS network_overrides(
199 network inet NOT NULL,
201 is_anonymous_proxy boolean,
202 is_satellite_provider boolean,
205 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
206 ON network_overrides(network);
207 CREATE INDEX IF NOT EXISTS network_overrides_search
208 ON network_overrides USING GIST(network inet_ops);
209 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
210 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
215 def handle_write(self
, ns
):
217 Compiles a database in libloc format out of what is in the database
220 writer
= location
.Writer(ns
.signing_key
, ns
.backup_signing_key
)
224 writer
.vendor
= ns
.vendor
227 writer
.description
= ns
.description
230 writer
.license
= ns
.license
232 # Add all Autonomous Systems
233 log
.info("Writing Autonomous Systems...")
235 # Select all ASes with a name
236 rows
= self
.db
.query("""
238 autnums.number AS number,
240 (SELECT overrides.name FROM autnum_overrides overrides
241 WHERE overrides.number = autnums.number),
245 WHERE name <> %s ORDER BY number
249 a
= writer
.add_as(row
.number
)
253 log
.info("Writing networks...")
255 # Select all known networks
256 rows
= self
.db
.query("""
257 WITH known_networks AS (
258 SELECT network FROM announcements
260 SELECT network FROM networks
262 SELECT network FROM network_overrides
265 ordered_networks AS (
267 known_networks.network AS network,
268 announcements.autnum AS autnum,
269 networks.country AS country,
271 -- Must be part of returned values for ORDER BY clause
272 masklen(announcements.network) AS sort_a,
273 masklen(networks.network) AS sort_b
277 announcements ON known_networks.network <<= announcements.network
279 networks ON known_networks.network <<= networks.network
281 known_networks.network,
286 -- Return a list of those networks enriched with all
287 -- other information that we store in the database
289 DISTINCT ON (network)
296 SELECT country FROM network_overrides overrides
297 WHERE networks.network <<= overrides.network
298 ORDER BY masklen(overrides.network) DESC
302 SELECT country FROM autnum_overrides overrides
303 WHERE networks.autnum = overrides.number
311 SELECT is_anonymous_proxy FROM network_overrides overrides
312 WHERE networks.network <<= overrides.network
313 ORDER BY masklen(overrides.network) DESC
317 SELECT is_anonymous_proxy FROM autnum_overrides overrides
318 WHERE networks.autnum = overrides.number
321 ) AS is_anonymous_proxy,
324 SELECT is_satellite_provider FROM network_overrides overrides
325 WHERE networks.network <<= overrides.network
326 ORDER BY masklen(overrides.network) DESC
330 SELECT is_satellite_provider FROM autnum_overrides overrides
331 WHERE networks.autnum = overrides.number
334 ) AS is_satellite_provider,
337 SELECT is_anycast FROM network_overrides overrides
338 WHERE networks.network <<= overrides.network
339 ORDER BY masklen(overrides.network) DESC
343 SELECT is_anycast FROM autnum_overrides overrides
344 WHERE networks.autnum = overrides.number
350 SELECT is_drop FROM network_overrides overrides
351 WHERE networks.network <<= overrides.network
352 ORDER BY masklen(overrides.network) DESC
356 SELECT is_drop FROM autnum_overrides overrides
357 WHERE networks.autnum = overrides.number
362 ordered_networks networks
366 network
= writer
.add_network(row
.network
)
370 network
.country_code
= row
.country
374 network
.asn
= row
.autnum
377 if row
.is_anonymous_proxy
:
378 network
.set_flag(location
.NETWORK_FLAG_ANONYMOUS_PROXY
)
380 if row
.is_satellite_provider
:
381 network
.set_flag(location
.NETWORK_FLAG_SATELLITE_PROVIDER
)
384 network
.set_flag(location
.NETWORK_FLAG_ANYCAST
)
387 network
.set_flag(location
.NETWORK_FLAG_DROP
)
390 log
.info("Writing countries...")
391 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
394 c
= writer
.add_country(row
.country_code
)
395 c
.continent_code
= row
.continent_code
398 # Write everything to file
399 log
.info("Writing database to file...")
403 def handle_update_whois(self
, ns
):
404 downloader
= location
.importer
.Downloader()
406 # Download all sources
407 with self
.db
.transaction():
408 # Create some temporary tables to store parsed data
410 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL, organization text NOT NULL, source text NOT NULL)
412 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
414 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL, name text NOT NULL, source text NOT NULL)
416 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
418 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL)
420 CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
421 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
424 # Remove all previously imported content
426 TRUNCATE TABLE networks;
429 # Fetch all valid country codes to check parsed networks aganist...
430 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
434 validcountries
.append(row
.country_code
)
436 for source_key
in location
.importer
.WHOIS_SOURCES
:
437 for single_url
in location
.importer
.WHOIS_SOURCES
[source_key
]:
438 with downloader
.request(single_url
, return_blocks
=True) as f
:
440 self
._parse
_block
(block
, source_key
, validcountries
)
442 # Process all parsed networks from every RIR we happen to have access to,
443 # insert the largest network chunks into the networks table immediately...
444 families
= self
.db
.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
446 for family
in (row
.family
for row
in families
):
447 smallest
= self
.db
.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family
)
449 self
.db
.execute("INSERT INTO networks(network, country, original_countries, source) \
450 SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest
.prefix
, family
)
452 # ... determine any other prefixes for this network family, ...
453 prefixes
= self
.db
.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
454 WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family
)
456 # ... and insert networks with this prefix in case they provide additional
457 # information (i. e. subnet of a larger chunk with a different country)
458 for prefix
in (row
.prefix
for row
in prefixes
):
464 _rirdata.original_countries,
469 family(_rirdata.network) = %s
471 masklen(_rirdata.network) = %s
475 DISTINCT ON (c.network)
478 c.original_countries,
480 masklen(networks.network),
481 networks.country AS parent_country
487 c.network << networks.network
490 masklen(networks.network) DESC NULLS LAST
493 networks(network, country, original_countries, source)
502 parent_country IS NULL
504 country <> parent_country
505 ON CONFLICT DO NOTHING""",
510 INSERT INTO autnums(number, name, source)
511 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
512 JOIN _organizations ON _autnums.organization = _organizations.handle
513 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
516 # Download all extended sources
517 for source_key
in location
.importer
.EXTENDED_SOURCES
:
518 for single_url
in location
.importer
.EXTENDED_SOURCES
[source_key
]:
519 with self
.db
.transaction():
521 with downloader
.request(single_url
) as f
:
523 self
._parse
_line
(line
, source_key
, validcountries
)
525 # Download and import (technical) AS names from ARIN
526 self
._import
_as
_names
_from
_arin
()
528 def _check_parsed_network(self
, network
):
530 Assistive function to detect and subsequently sort out parsed
531 networks from RIR data (both Whois and so-called "extended sources"),
534 (a) not globally routable (RFC 1918 space, et al.)
535 (b) covering a too large chunk of the IP address space (prefix length
536 is < 7 for IPv4 networks, and < 10 for IPv6)
537 (c) "0.0.0.0" or "::" as a network address
538 (d) are too small for being publicly announced (we have decided not to
539 process them at the moment, as they significantly enlarge our
540 database without providing very helpful additional information)
542 This unfortunately is necessary due to brain-dead clutter across
543 various RIR databases, causing mismatches and eventually disruptions.
545 We will return False in case a network is not suitable for adding
546 it to our database, and True otherwise.
549 if not network
or not (isinstance(network
, ipaddress
.IPv4Network
) or isinstance(network
, ipaddress
.IPv6Network
)):
552 if not network
.is_global
:
553 log
.debug("Skipping non-globally routable network: %s" % network
)
556 if network
.version
== 4:
557 if network
.prefixlen
< 7:
558 log
.debug("Skipping too big IP chunk: %s" % network
)
561 if network
.prefixlen
> 24:
562 log
.debug("Skipping network too small to be publicly announced: %s" % network
)
565 if str(network
.network_address
) == "0.0.0.0":
566 log
.debug("Skipping network based on 0.0.0.0: %s" % network
)
569 elif network
.version
== 6:
570 if network
.prefixlen
< 10:
571 log
.debug("Skipping too big IP chunk: %s" % network
)
574 if network
.prefixlen
> 48:
575 log
.debug("Skipping network too small to be publicly announced: %s" % network
)
578 if str(network
.network_address
) == "::":
579 log
.debug("Skipping network based on '::': %s" % network
)
583 # This should not happen...
584 log
.warning("Skipping network of unknown family, this should not happen: %s" % network
)
587 # In case we have made it here, the network is considered to
588 # be suitable for libloc consumption...
591 def _check_parsed_asn(self
, asn
):
593 Assistive function to filter Autonomous System Numbers not being suitable
594 for adding to our database. Returns False in such cases, and True otherwise.
597 for start
, end
in VALID_ASN_RANGES
:
598 if start
<= asn
and end
>= asn
:
601 log
.info("Supplied ASN %s out of publicly routable ASN ranges" % asn
)
604 def _parse_block(self
, block
, source_key
, validcountries
= None):
605 # Get first line to find out what type of block this is
609 if line
.startswith("aut-num:"):
610 return self
._parse
_autnum
_block
(block
, source_key
)
613 if line
.startswith("inet6num:") or line
.startswith("inetnum:"):
614 return self
._parse
_inetnum
_block
(block
, source_key
, validcountries
)
617 elif line
.startswith("organisation:"):
618 return self
._parse
_org
_block
(block
, source_key
)
620 def _parse_autnum_block(self
, block
, source_key
):
624 key
, val
= split_line(line
)
627 m
= re
.match(r
"^(AS|as)(\d+)", val
)
629 autnum
["asn"] = m
.group(2)
632 autnum
[key
] = val
.upper()
635 # Save the first description line as well...
636 if not key
in autnum
:
640 if not autnum
or not "asn" in autnum
:
643 # Insert a dummy organisation handle into our temporary organisations
644 # table in case the AS does not have an organisation handle set, but
645 # has a description (a quirk often observed in APNIC area), so we can
646 # later display at least some string for this AS.
647 if not "org" in autnum
:
648 if "descr" in autnum
:
649 autnum
["org"] = "LIBLOC-%s-ORGHANDLE" % autnum
.get("asn")
651 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
652 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
653 autnum
.get("org"), autnum
.get("descr"), source_key
,
656 log
.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
660 # Insert into database
661 self
.db
.execute("INSERT INTO _autnums(number, organization, source) \
662 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
663 organization = excluded.organization",
664 autnum
.get("asn"), autnum
.get("org"), source_key
,
667 def _parse_inetnum_block(self
, block
, source_key
, validcountries
= None):
668 log
.debug("Parsing inetnum block:")
675 key
, val
= split_line(line
)
677 # Filter any inetnum records which are only referring to IP space
678 # not managed by that specific RIR...
680 if re
.match(r
"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val
.strip()):
681 log
.debug("Skipping record indicating historic/orphaned data: %s" % val
.strip())
685 start_address
, delim
, end_address
= val
.partition("-")
687 # Strip any excess space
688 start_address
, end_address
= start_address
.rstrip(), end_address
.strip()
690 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
691 if start_address
and not (delim
or end_address
):
693 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
695 start_address
= start_address
.split("/")
696 ldigits
= start_address
[0].count(".")
698 # How many octets do we need to add?
699 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
701 start_address
= start_address
[0] + ".0.0/" + start_address
[1]
703 start_address
= start_address
[0] + ".0/" + start_address
[1]
705 log
.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line
)
709 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
711 log
.warning("Could not parse line in LACNIC DB format: %s" % line
)
714 # Enumerate first and last IP address of this network
715 end_address
= start_address
[-1]
716 start_address
= start_address
[0]
719 # Convert to IP address
721 start_address
= ipaddress
.ip_address(start_address
)
722 end_address
= ipaddress
.ip_address(end_address
)
724 log
.warning("Could not parse line: %s" % line
)
727 inetnum
["inetnum"] = list(ipaddress
.summarize_address_range(start_address
, end_address
))
729 elif key
== "inet6num":
730 inetnum
[key
] = [ipaddress
.ip_network(val
, strict
=False)]
732 elif key
== "country":
735 # Catch RIR data objects with more than one country code...
736 if not key
in inetnum
:
739 if val
in inetnum
.get("country"):
740 # ... but keep this list distinct...
743 # When people set country codes to "UK", they actually mean "GB"
747 inetnum
[key
].append(val
)
750 if not inetnum
or not "country" in inetnum
:
753 # Prepare skipping objects with unknown country codes...
754 invalidcountries
= [singlecountry
for singlecountry
in inetnum
.get("country") if singlecountry
not in validcountries
]
756 # Iterate through all networks enumerated from above, check them for plausibility and insert
757 # them into the database, if _check_parsed_network() succeeded
758 for single_network
in inetnum
.get("inet6num") or inetnum
.get("inetnum"):
759 if self
._check
_parsed
_network
(single_network
):
761 # Skip objects with unknown country codes if they are valid to avoid log spam...
762 if validcountries
and invalidcountries
:
763 log
.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
764 (invalidcountries
, inetnum
.get("country"), inetnum
.get("inet6num") or inetnum
.get("inetnum")))
767 # Everything is fine here, run INSERT statement...
768 self
.db
.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
769 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
770 "%s" % single_network
, inetnum
.get("country")[0], inetnum
.get("country"), source_key
,
773 def _parse_org_block(self
, block
, source_key
):
777 key
, val
= split_line(line
)
779 if key
== "organisation":
780 org
[key
] = val
.upper()
781 elif key
== "org-name":
788 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
789 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
790 UPDATE SET name = excluded.name",
791 org
.get("organisation"), org
.get("org-name"), source_key
,
794 def _parse_line(self
, line
, source_key
, validcountries
= None):
796 if line
.startswith("2"):
800 if line
.startswith("#"):
804 registry
, country_code
, type, line
= line
.split("|", 3)
806 log
.warning("Could not parse line: %s" % line
)
809 # Skip any lines that are for stats only or do not have a country
810 # code at all (avoids log spam below)
811 if not country_code
or country_code
== '*':
814 # Skip objects with unknown country codes
815 if validcountries
and country_code
not in validcountries
:
816 log
.warning("Skipping line with bogus country '%s': %s" % \
817 (country_code
, line
))
820 if type in ("ipv6", "ipv4"):
821 return self
._parse
_ip
_line
(country_code
, type, line
, source_key
)
823 def _parse_ip_line(self
, country
, type, line
, source_key
):
825 address
, prefix
, date
, status
, organization
= line
.split("|")
829 # Try parsing the line without organization
831 address
, prefix
, date
, status
= line
.split("|")
833 log
.warning("Unhandled line format: %s" % line
)
836 # Skip anything that isn't properly assigned
837 if not status
in ("assigned", "allocated"):
840 # Cast prefix into an integer
844 log
.warning("Invalid prefix: %s" % prefix
)
847 # Fix prefix length for IPv4
849 prefix
= 32 - int(math
.log(prefix
, 2))
851 # Try to parse the address
853 network
= ipaddress
.ip_network("%s/%s" % (address
, prefix
), strict
=False)
855 log
.warning("Invalid IP address: %s" % address
)
858 if not self
._check
_parsed
_network
(network
):
861 self
.db
.execute("INSERT INTO networks(network, country, original_countries, source) \
862 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
863 UPDATE SET country = excluded.country",
864 "%s" % network
, country
, [country
], source_key
,
867 def _import_as_names_from_arin(self
):
868 downloader
= location
.importer
.Downloader()
870 # XXX: Download AS names file from ARIN (note that these names appear to be quite
871 # technical, not intended for human consumption, as description fields in
872 # organisation handles for other RIRs are - however, this is what we have got,
873 # and in some cases, it might be still better than nothing)
874 with downloader
.request("https://ftp.arin.net/info/asn.txt", return_blocks
=False) as f
:
876 # Convert binary line to string...
879 # ... valid lines start with a space, followed by the number of the Autonomous System ...
880 if not line
.startswith(" "):
883 # Split line and check if there is a valid ASN in it...
884 asn
, name
= line
.split()[0:2]
889 log
.debug("Skipping ARIN AS names line not containing an integer for ASN")
892 # Filter invalid ASNs...
893 if not self
._check
_parsed
_asn
(asn
):
896 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
897 if re
.match(r
"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|LACNIC|RIPE|IANA)(?:\d?$|\-)", name
):
900 # Bail out in case the AS name contains anything we do not expect here...
901 if re
.search(r
"[^a-zA-Z0-9-_]", name
):
902 log
.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
905 # Things look good here, run INSERT statement and skip this one if we already have
906 # a (better?) name for this Autonomous System...
912 ) VALUES (%s, %s, %s)
913 ON CONFLICT (number) DO NOTHING""",
919 def handle_update_announcements(self
, ns
):
920 server
= ns
.server
[0]
922 with self
.db
.transaction():
923 if server
.startswith("/"):
924 self
._handle
_update
_announcements
_from
_bird
(server
)
926 self
._handle
_update
_announcements
_from
_telnet
(server
)
928 # Purge anything we never want here
930 -- Delete default routes
931 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
933 -- Delete anything that is not global unicast address space
934 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
936 -- DELETE "current network" address space
937 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
939 -- DELETE local loopback address space
940 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
942 -- DELETE RFC 1918 address space
943 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
944 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
945 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
947 -- DELETE test, benchmark and documentation address space
948 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
949 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
950 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
951 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
952 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
954 -- DELETE CGNAT address space (RFC 6598)
955 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
957 -- DELETE link local address space
958 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
960 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
961 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
962 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
964 -- DELETE multicast and reserved address space
965 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
966 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
968 -- Delete networks that are too small to be in the global routing table
969 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
970 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
972 -- Delete any non-public or reserved ASNs
973 DELETE FROM announcements WHERE NOT (
974 (autnum >= 1 AND autnum <= 23455)
976 (autnum >= 23457 AND autnum <= 64495)
978 (autnum >= 131072 AND autnum <= 4199999999)
981 -- Delete everything that we have not seen for 14 days
982 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
985 def _handle_update_announcements_from_bird(self
, server
):
986 # Pre-compile the regular expression for faster searching
987 route
= re
.compile(b
"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
989 log
.info("Requesting routing table from Bird (%s)" % server
)
991 aggregated_networks
= []
993 # Send command to list all routes
994 for line
in self
._bird
_cmd
(server
, "show route"):
995 m
= route
.match(line
)
1001 # Ignore any header lines with the name of the routing table
1002 elif line
.startswith(b
"Table"):
1007 log
.debug("Could not parse line: %s" % line
.decode())
1011 # Fetch the extracted network and ASN
1012 network
, autnum
= m
.groups()
1014 # Decode into strings
1016 network
= network
.decode()
1018 autnum
= autnum
.decode()
1020 # Collect all aggregated networks
1022 log
.debug("%s is an aggregated network" % network
)
1023 aggregated_networks
.append(network
)
1026 # Insert it into the database
1027 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1028 VALUES(%s, %s) ON CONFLICT (network) DO \
1029 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1033 # Process any aggregated networks
1034 for network
in aggregated_networks
:
1035 log
.debug("Processing aggregated network %s" % network
)
1037 # Run "show route all" for each network
1038 for line
in self
._bird
_cmd
(server
, "show route %s all" % network
):
1039 # Try finding the path
1040 m
= re
.match(b
"\s+BGP\.as_path:.* (\d+) {\d+}$", line
)
1042 # Select the last AS number in the path
1043 autnum
= m
.group(1).decode()
1045 # Insert it into the database
1046 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1047 VALUES(%s, %s) ON CONFLICT (network) DO \
1048 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1052 # We don't need to process any more
1055 def _handle_update_announcements_from_telnet(self
, server
):
1056 # Pre-compile regular expression for routes
1057 route
= re
.compile(b
"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re
.MULTILINE|re
.DOTALL
)
1059 with telnetlib
.Telnet(server
) as t
:
1062 # t.set_debuglevel(10)
1064 # Wait for console greeting
1065 greeting
= t
.read_until(b
"> ", timeout
=30)
1067 log
.error("Could not get a console prompt")
1070 # Disable pagination
1071 t
.write(b
"terminal length 0\n")
1073 # Wait for the prompt to return
1076 # Fetch the routing tables
1077 for protocol
in ("ipv6", "ipv4"):
1078 log
.info("Requesting %s routing table" % protocol
)
1080 # Request the full unicast routing table
1081 t
.write(b
"show bgp %s unicast\n" % protocol
.encode())
1083 # Read entire header which ends with "Path"
1084 t
.read_until(b
"Path\r\n")
1087 # Try reading a full entry
1088 # Those might be broken across multiple lines but ends with i
1089 line
= t
.read_until(b
"i\r\n", timeout
=5)
1093 # Show line for debugging
1094 #log.debug(repr(line))
1096 # Try finding a route in here
1097 m
= route
.match(line
)
1099 network
, autnum
= m
.groups()
1101 # Convert network to string
1102 network
= network
.decode()
1104 # Append /24 for IPv4 addresses
1105 if not "/" in network
and not ":" in network
:
1106 network
= "%s/24" % network
1108 # Convert AS number to integer
1109 autnum
= int(autnum
)
1111 log
.info("Found announcement for %s by %s" % (network
, autnum
))
1113 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1114 VALUES(%s, %s) ON CONFLICT (network) DO \
1115 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1119 log
.info("Finished reading the %s routing table" % protocol
)
1121 def _bird_cmd(self
, socket_path
, command
):
1122 # Connect to the socket
1123 s
= socket
.socket(socket
.AF_UNIX
, socket
.SOCK_STREAM
)
1124 s
.connect(socket_path
)
1126 # Allocate some buffer
1129 log
.debug("Sending Bird command: %s" % command
)
1132 s
.send(b
"%s\n" % command
.encode())
1135 # Fill up the buffer
1136 buffer += s
.recv(4096)
1139 # Search for the next newline
1140 pos
= buffer.find(b
"\n")
1142 # If we cannot find one, we go back and read more data
1146 # Cut after the newline character
1149 # Split the line we want and keep the rest in buffer
1150 line
, buffer = buffer[:pos
], buffer[pos
:]
1152 # Try parsing any status lines
1153 if len(line
) > 4 and line
[:4].isdigit() and line
[4] in (32, 45):
1154 code
, delim
, line
= int(line
[:4]), line
[4], line
[5:]
1156 log
.debug("Received response code %s from bird" % code
)
1166 # Otherwise return the line
1169 def handle_update_overrides(self
, ns
):
1170 with self
.db
.transaction():
1171 # Only drop manually created overrides, as we can be reasonably sure to have them,
1172 # and preserve the rest. If appropriate, it is deleted by correspondent functions.
1174 DELETE FROM autnum_overrides WHERE source = 'manual';
1175 DELETE FROM network_overrides WHERE source = 'manual';
1178 # Update overrides for various cloud providers big enough to publish their own IP
1179 # network allocation lists in a machine-readable format...
1180 self
._update
_overrides
_for
_aws
()
1182 # Update overrides for Spamhaus DROP feeds...
1183 self
._update
_overrides
_for
_spamhaus
_drop
()
1185 for file in ns
.files
:
1186 log
.info("Reading %s..." % file)
1188 with
open(file, "rb") as f
:
1189 for type, block
in location
.importer
.read_blocks(f
):
1191 network
= block
.get("net")
1192 # Try to parse and normalise the network
1194 network
= ipaddress
.ip_network(network
, strict
=False)
1195 except ValueError as e
:
1196 log
.warning("Invalid IP network: %s: %s" % (network
, e
))
1199 # Prevent that we overwrite all networks
1200 if network
.prefixlen
== 0:
1201 log
.warning("Skipping %s: You cannot overwrite default" % network
)
1205 INSERT INTO network_overrides(
1210 is_satellite_provider,
1213 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
1214 ON CONFLICT (network) DO NOTHING""",
1216 block
.get("country"),
1218 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1219 self
._parse
_bool
(block
, "is-satellite-provider"),
1220 self
._parse
_bool
(block
, "is-anycast"),
1221 self
._parse
_bool
(block
, "drop"),
1224 elif type == "aut-num":
1225 autnum
= block
.get("aut-num")
1227 # Check if AS number begins with "AS"
1228 if not autnum
.startswith("AS"):
1229 log
.warning("Invalid AS number: %s" % autnum
)
1236 INSERT INTO autnum_overrides(
1242 is_satellite_provider,
1245 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
1246 ON CONFLICT DO NOTHING""",
1249 block
.get("country"),
1251 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1252 self
._parse
_bool
(block
, "is-satellite-provider"),
1253 self
._parse
_bool
(block
, "is-anycast"),
1254 self
._parse
_bool
(block
, "drop"),
1258 log
.warning("Unsupported type: %s" % type)
1260 def _update_overrides_for_aws(self
):
1261 # Download Amazon AWS IP allocation file to create overrides...
1262 downloader
= location
.importer
.Downloader()
1265 with downloader
.request("https://ip-ranges.amazonaws.com/ip-ranges.json", return_blocks
=False) as f
:
1266 aws_ip_dump
= json
.load(f
.body
)
1267 except Exception as e
:
1268 log
.error("unable to preprocess Amazon AWS IP ranges: %s" % e
)
1271 # At this point, we can assume the downloaded file to be valid
1273 DELETE FROM network_overrides WHERE source = 'Amazon AWS IP feed';
1276 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1277 # there seems to be no machine-readable version available of this other than
1278 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1279 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1280 # was helpful here as well.
1281 aws_region_country_map
= {
1286 "ap-northeast-3": "JP",
1287 "ap-northeast-2": "KR",
1288 "ap-southeast-1": "SG",
1289 "ap-southeast-2": "AU",
1290 "ap-southeast-3": "MY",
1291 "ap-southeast-4": "AU",
1292 "ap-northeast-1": "JP",
1293 "ca-central-1": "CA",
1294 "eu-central-1": "DE",
1295 "eu-central-2": "CH",
1302 "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
1303 "me-central-1": "AE",
1308 # Fetch all valid country codes to check parsed networks aganist...
1309 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
1313 validcountries
.append(row
.country_code
)
1315 with self
.db
.transaction():
1316 for snetwork
in aws_ip_dump
["prefixes"] + aws_ip_dump
["ipv6_prefixes"]:
1318 network
= ipaddress
.ip_network(snetwork
.get("ip_prefix") or snetwork
.get("ipv6_prefix"), strict
=False)
1320 log
.warning("Unable to parse line: %s" % snetwork
)
1323 # Sanitize parsed networks...
1324 if not self
._check
_parsed
_network
(network
):
1327 # Determine region of this network...
1328 region
= snetwork
["region"]
1332 # Any region name starting with "us-" will get "US" country code assigned straight away...
1333 if region
.startswith("us-"):
1335 elif region
.startswith("cn-"):
1336 # ... same goes for China ...
1338 elif region
== "GLOBAL":
1339 # ... funny region name for anycast-like networks ...
1341 elif region
in aws_region_country_map
:
1342 # ... assign looked up country code otherwise ...
1343 cc
= aws_region_country_map
[region
]
1345 # ... and bail out if we are missing something here
1346 log
.warning("Unable to determine country code for line: %s" % snetwork
)
1349 # Skip networks with unknown country codes
1350 if not is_anycast
and validcountries
and cc
not in validcountries
:
1351 log
.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1355 # Conduct SQL statement...
1357 INSERT INTO network_overrides(
1362 is_satellite_provider,
1364 ) VALUES (%s, %s, %s, %s, %s, %s)
1365 ON CONFLICT (network) DO NOTHING""",
1368 "Amazon AWS IP feed",
1375 def _update_overrides_for_spamhaus_drop(self
):
1376 downloader
= location
.importer
.Downloader()
1379 "https://www.spamhaus.org/drop/drop.txt",
1380 "https://www.spamhaus.org/drop/edrop.txt",
1381 "https://www.spamhaus.org/drop/dropv6.txt"
1385 "https://www.spamhaus.org/drop/asndrop.txt"
1390 with downloader
.request(url
, return_blocks
=False) as f
:
1391 fcontent
= f
.body
.readlines()
1392 except Exception as e
:
1393 log
.error("Unable to download Spamhaus DROP URL %s: %s" % (url
, e
))
1396 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
1398 if len(fcontent
) > 10:
1400 DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list';
1401 DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists';
1404 log
.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url
)
1407 # Iterate through every line, filter comments and add remaining networks to
1408 # the override table in case they are valid...
1409 with self
.db
.transaction():
1410 for sline
in fcontent
:
1412 # The response is assumed to be encoded in UTF-8...
1413 sline
= sline
.decode("utf-8")
1415 # Comments start with a semicolon...
1416 if sline
.startswith(";"):
1419 # Extract network and ignore anything afterwards...
1421 network
= ipaddress
.ip_network(sline
.split()[0], strict
=False)
1423 log
.error("Unable to parse line: %s" % sline
)
1426 # Sanitize parsed networks...
1427 if not self
._check
_parsed
_network
(network
):
1428 log
.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
1432 # Conduct SQL statement...
1434 INSERT INTO network_overrides(
1438 ) VALUES (%s, %s, %s)
1439 ON CONFLICT (network) DO UPDATE SET is_drop = True""",
1441 "Spamhaus DROP lists",
1445 for url
in asn_urls
:
1447 with downloader
.request(url
, return_blocks
=False) as f
:
1448 fcontent
= f
.body
.readlines()
1449 except Exception as e
:
1450 log
.error("Unable to download Spamhaus DROP URL %s: %s" % (url
, e
))
1453 # Iterate through every line, filter comments and add remaining ASNs to
1454 # the override table in case they are valid...
1455 with self
.db
.transaction():
1456 for sline
in fcontent
:
1458 # The response is assumed to be encoded in UTF-8...
1459 sline
= sline
.decode("utf-8")
1461 # Comments start with a semicolon...
1462 if sline
.startswith(";"):
1465 # Throw away anything after the first space...
1466 sline
= sline
.split()[0]
1468 # ... strip the "AS" prefix from it ...
1469 sline
= sline
.strip("AS")
1471 # ... and convert it into an integer. Voila.
1474 # Filter invalid ASNs...
1475 if not self
._check
_parsed
_asn
(asn
):
1476 log
.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
1480 # Conduct SQL statement...
1482 INSERT INTO autnum_overrides(
1486 ) VALUES (%s, %s, %s)
1487 ON CONFLICT (number) DO UPDATE SET is_drop = True""",
1489 "Spamhaus ASN-DROP list",
1494 def _parse_bool(block
, key
):
1495 val
= block
.get(key
)
1497 # There is no point to proceed when we got None
1501 # Convert to lowercase
1505 if val
in ("yes", "1"):
1509 if val
in ("no", "0"):
1515 def handle_import_countries(self
, ns
):
1516 with self
.db
.transaction():
1517 # Drop all data that we have
1518 self
.db
.execute("TRUNCATE TABLE countries")
1520 for file in ns
.file:
1522 line
= line
.rstrip()
1524 # Ignore any comments
1525 if line
.startswith("#"):
1529 country_code
, continent_code
, name
= line
.split(maxsplit
=2)
1531 log
.warning("Could not parse line: %s" % line
)
1534 self
.db
.execute("INSERT INTO countries(country_code, name, continent_code) \
1535 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code
, name
, continent_code
)
1538 def split_line(line
):
1539 key
, colon
, val
= line
.partition(":")
1541 # Strip any excess space
1548 # Run the command line interface