2 ###############################################################################
4 # libloc - A library to determine the location of someone on the Internet #
6 # Copyright (C) 2020-2022 IPFire Development Team <info@ipfire.org> #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
18 ###############################################################################
31 # Load our location module
33 import location
.database
34 import location
.importer
35 from location
.i18n
import _
38 log
= logging
.getLogger("location.importer")
51 parser
= argparse
.ArgumentParser(
52 description
=_("Location Importer Command Line Interface"),
54 subparsers
= parser
.add_subparsers()
56 # Global configuration flags
57 parser
.add_argument("--debug", action
="store_true",
58 help=_("Enable debug output"))
59 parser
.add_argument("--quiet", action
="store_true",
60 help=_("Enable quiet mode"))
63 parser
.add_argument("--version", action
="version",
64 version
="%(prog)s @VERSION@")
67 parser
.add_argument("--database-host", required
=True,
68 help=_("Database Hostname"), metavar
=_("HOST"))
69 parser
.add_argument("--database-name", required
=True,
70 help=_("Database Name"), metavar
=_("NAME"))
71 parser
.add_argument("--database-username", required
=True,
72 help=_("Database Username"), metavar
=_("USERNAME"))
73 parser
.add_argument("--database-password", required
=True,
74 help=_("Database Password"), metavar
=_("PASSWORD"))
77 write
= subparsers
.add_parser("write", help=_("Write database to file"))
78 write
.set_defaults(func
=self
.handle_write
)
79 write
.add_argument("file", nargs
=1, help=_("Database File"))
80 write
.add_argument("--signing-key", nargs
="?", type=open, help=_("Signing Key"))
81 write
.add_argument("--backup-signing-key", nargs
="?", type=open, help=_("Backup Signing Key"))
82 write
.add_argument("--vendor", nargs
="?", help=_("Sets the vendor"))
83 write
.add_argument("--description", nargs
="?", help=_("Sets a description"))
84 write
.add_argument("--license", nargs
="?", help=_("Sets the license"))
85 write
.add_argument("--version", type=int, help=_("Database Format Version"))
88 update_whois
= subparsers
.add_parser("update-whois", help=_("Update WHOIS Information"))
89 update_whois
.set_defaults(func
=self
.handle_update_whois
)
91 # Update announcements
92 update_announcements
= subparsers
.add_parser("update-announcements",
93 help=_("Update BGP Annoucements"))
94 update_announcements
.set_defaults(func
=self
.handle_update_announcements
)
95 update_announcements
.add_argument("server", nargs
=1,
96 help=_("Route Server to connect to"), metavar
=_("SERVER"))
99 update_overrides
= subparsers
.add_parser("update-overrides",
100 help=_("Update overrides"),
102 update_overrides
.add_argument(
103 "files", nargs
="+", help=_("Files to import"),
105 update_overrides
.set_defaults(func
=self
.handle_update_overrides
)
108 import_countries
= subparsers
.add_parser("import-countries",
109 help=_("Import countries"),
111 import_countries
.add_argument("file", nargs
=1, type=argparse
.FileType("r"),
112 help=_("File to import"))
113 import_countries
.set_defaults(func
=self
.handle_import_countries
)
115 args
= parser
.parse_args()
119 location
.logger
.set_level(logging
.DEBUG
)
121 location
.logger
.set_level(logging
.WARNING
)
123 # Print usage if no action was given
124 if not "func" in args
:
131 # Parse command line arguments
132 args
= self
.parse_cli()
134 # Initialise database
135 self
.db
= self
._setup
_database
(args
)
138 ret
= args
.func(args
)
140 # Return with exit code
144 # Otherwise just exit
147 def _setup_database(self
, ns
):
149 Initialise the database
151 # Connect to database
152 db
= location
.database
.Connection(
153 host
=ns
.database_host
, database
=ns
.database_name
,
154 user
=ns
.database_username
, password
=ns
.database_password
,
157 with db
.transaction():
160 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
161 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
162 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
163 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
164 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
165 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
168 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
169 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
170 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
173 CREATE TABLE IF NOT EXISTS countries(
174 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
175 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
178 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
179 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
180 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
181 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
182 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
183 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
186 CREATE TABLE IF NOT EXISTS autnum_overrides(
187 number bigint NOT NULL,
190 is_anonymous_proxy boolean,
191 is_satellite_provider boolean,
194 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
195 ON autnum_overrides(number);
196 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
197 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
199 CREATE TABLE IF NOT EXISTS network_overrides(
200 network inet NOT NULL,
202 is_anonymous_proxy boolean,
203 is_satellite_provider boolean,
206 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
207 ON network_overrides(network);
208 CREATE INDEX IF NOT EXISTS network_overrides_search
209 ON network_overrides USING GIST(network inet_ops);
210 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
211 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
216 def handle_write(self
, ns
):
218 Compiles a database in libloc format out of what is in the database
221 writer
= location
.Writer(ns
.signing_key
, ns
.backup_signing_key
)
225 writer
.vendor
= ns
.vendor
228 writer
.description
= ns
.description
231 writer
.license
= ns
.license
233 # Add all Autonomous Systems
234 log
.info("Writing Autonomous Systems...")
236 # Select all ASes with a name
237 rows
= self
.db
.query("""
239 autnums.number AS number,
241 (SELECT overrides.name FROM autnum_overrides overrides
242 WHERE overrides.number = autnums.number),
246 WHERE name <> %s ORDER BY number
250 a
= writer
.add_as(row
.number
)
254 log
.info("Writing networks...")
256 # Select all known networks
257 rows
= self
.db
.query("""
258 WITH known_networks AS (
259 SELECT network FROM announcements
261 SELECT network FROM networks
263 SELECT network FROM network_overrides
266 ordered_networks AS (
268 known_networks.network AS network,
269 announcements.autnum AS autnum,
270 networks.country AS country,
272 -- Must be part of returned values for ORDER BY clause
273 masklen(announcements.network) AS sort_a,
274 masklen(networks.network) AS sort_b
278 announcements ON known_networks.network <<= announcements.network
280 networks ON known_networks.network <<= networks.network
282 known_networks.network,
287 -- Return a list of those networks enriched with all
288 -- other information that we store in the database
290 DISTINCT ON (network)
297 SELECT country FROM network_overrides overrides
298 WHERE networks.network <<= overrides.network
299 ORDER BY masklen(overrides.network) DESC
303 SELECT country FROM autnum_overrides overrides
304 WHERE networks.autnum = overrides.number
312 SELECT is_anonymous_proxy FROM network_overrides overrides
313 WHERE networks.network <<= overrides.network
314 ORDER BY masklen(overrides.network) DESC
318 SELECT is_anonymous_proxy FROM autnum_overrides overrides
319 WHERE networks.autnum = overrides.number
322 ) AS is_anonymous_proxy,
325 SELECT is_satellite_provider FROM network_overrides overrides
326 WHERE networks.network <<= overrides.network
327 ORDER BY masklen(overrides.network) DESC
331 SELECT is_satellite_provider FROM autnum_overrides overrides
332 WHERE networks.autnum = overrides.number
335 ) AS is_satellite_provider,
338 SELECT is_anycast FROM network_overrides overrides
339 WHERE networks.network <<= overrides.network
340 ORDER BY masklen(overrides.network) DESC
344 SELECT is_anycast FROM autnum_overrides overrides
345 WHERE networks.autnum = overrides.number
351 SELECT is_drop FROM network_overrides overrides
352 WHERE networks.network <<= overrides.network
353 ORDER BY masklen(overrides.network) DESC
357 SELECT is_drop FROM autnum_overrides overrides
358 WHERE networks.autnum = overrides.number
363 ordered_networks networks
367 network
= writer
.add_network(row
.network
)
371 network
.country_code
= row
.country
375 network
.asn
= row
.autnum
378 if row
.is_anonymous_proxy
:
379 network
.set_flag(location
.NETWORK_FLAG_ANONYMOUS_PROXY
)
381 if row
.is_satellite_provider
:
382 network
.set_flag(location
.NETWORK_FLAG_SATELLITE_PROVIDER
)
385 network
.set_flag(location
.NETWORK_FLAG_ANYCAST
)
388 network
.set_flag(location
.NETWORK_FLAG_DROP
)
391 log
.info("Writing countries...")
392 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
395 c
= writer
.add_country(row
.country_code
)
396 c
.continent_code
= row
.continent_code
399 # Write everything to file
400 log
.info("Writing database to file...")
404 def handle_update_whois(self
, ns
):
405 downloader
= location
.importer
.Downloader()
407 # Did we run successfully?
410 # Fetch all valid country codes to check parsed networks aganist
411 validcountries
= self
.countries
413 # Iterate over all potential sources
414 for source
in sorted(location
.importer
.SOURCES
):
415 with self
.db
.transaction():
416 # Create some temporary tables to store parsed data
418 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
419 organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
420 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
422 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
423 name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
424 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
426 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL,
427 original_countries text[] NOT NULL, source text NOT NULL)
429 CREATE INDEX _rirdata_search ON _rirdata
430 USING BTREE(family(network), masklen(network));
431 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
434 # Remove all previously imported content
435 self
.db
.execute("DELETE FROM networks WHERE source = %s", source
)
438 # Fetch WHOIS sources
439 for url
in location
.importer
.WHOIS_SOURCES
.get(source
, []):
440 for block
in downloader
.request_blocks(url
):
441 self
._parse
_block
(block
, source
, validcountries
)
443 # Fetch extended sources
444 for url
in location
.importer
.EXTENDED_SOURCES
.get(source
, []):
445 for line
in downloader
.request_lines(url
):
446 self
._parse
_line
(line
, source
, validcountries
)
447 except urllib
.error
.URLError
as e
:
448 log
.error("Could not retrieve data from %s: %s" % (source
, e
))
451 # Continue with the next source
454 # Process all parsed networks from every RIR we happen to have access to,
455 # insert the largest network chunks into the networks table immediately...
456 families
= self
.db
.query("SELECT DISTINCT family(network) AS family FROM _rirdata \
457 ORDER BY family(network)")
459 for family
in (row
.family
for row
in families
):
460 # Fetch the smallest mask length in our data set
461 smallest
= self
.db
.get("""
469 family(network) = %s""",
491 masklen(network) = %s
500 # ... determine any other prefixes for this network family, ...
501 prefixes
= self
.db
.query("""
503 DISTINCT masklen(network) AS prefix
514 # ... and insert networks with this prefix in case they provide additional
515 # information (i. e. subnet of a larger chunk with a different country)
516 for prefix
in (row
.prefix
for row
in prefixes
):
522 _rirdata.original_countries,
527 family(_rirdata.network) = %s
529 masklen(_rirdata.network) = %s
533 DISTINCT ON (c.network)
536 c.original_countries,
538 masklen(networks.network),
539 networks.country AS parent_country
545 c.network << networks.network
548 masklen(networks.network) DESC NULLS LAST
551 networks(network, country, original_countries, source)
560 parent_country IS NULL
562 country <> parent_country
563 ON CONFLICT DO NOTHING""",
568 INSERT INTO autnums(number, name, source)
569 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
570 JOIN _organizations ON _autnums.organization = _organizations.handle
571 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
574 # Download and import (technical) AS names from ARIN
575 with self
.db
.transaction():
576 self
._import
_as
_names
_from
_arin
()
578 # Return a non-zero exit code for errors
579 return 1 if error
else 0
581 def _check_parsed_network(self
, network
):
583 Assistive function to detect and subsequently sort out parsed
584 networks from RIR data (both Whois and so-called "extended sources"),
587 (a) not globally routable (RFC 1918 space, et al.)
588 (b) covering a too large chunk of the IP address space (prefix length
589 is < 7 for IPv4 networks, and < 10 for IPv6)
590 (c) "0.0.0.0" or "::" as a network address
591 (d) are too small for being publicly announced (we have decided not to
592 process them at the moment, as they significantly enlarge our
593 database without providing very helpful additional information)
595 This unfortunately is necessary due to brain-dead clutter across
596 various RIR databases, causing mismatches and eventually disruptions.
598 We will return False in case a network is not suitable for adding
599 it to our database, and True otherwise.
602 if not network
or not (isinstance(network
, ipaddress
.IPv4Network
) or isinstance(network
, ipaddress
.IPv6Network
)):
605 if not network
.is_global
:
606 log
.debug("Skipping non-globally routable network: %s" % network
)
609 if network
.version
== 4:
610 if network
.prefixlen
< 7:
611 log
.debug("Skipping too big IP chunk: %s" % network
)
614 if network
.prefixlen
> 24:
615 log
.debug("Skipping network too small to be publicly announced: %s" % network
)
618 if str(network
.network_address
) == "0.0.0.0":
619 log
.debug("Skipping network based on 0.0.0.0: %s" % network
)
622 elif network
.version
== 6:
623 if network
.prefixlen
< 10:
624 log
.debug("Skipping too big IP chunk: %s" % network
)
627 if network
.prefixlen
> 48:
628 log
.debug("Skipping network too small to be publicly announced: %s" % network
)
631 if str(network
.network_address
) == "::":
632 log
.debug("Skipping network based on '::': %s" % network
)
636 # This should not happen...
637 log
.warning("Skipping network of unknown family, this should not happen: %s" % network
)
640 # In case we have made it here, the network is considered to
641 # be suitable for libloc consumption...
644 def _check_parsed_asn(self
, asn
):
646 Assistive function to filter Autonomous System Numbers not being suitable
647 for adding to our database. Returns False in such cases, and True otherwise.
650 for start
, end
in VALID_ASN_RANGES
:
651 if start
<= asn
and end
>= asn
:
654 log
.info("Supplied ASN %s out of publicly routable ASN ranges" % asn
)
657 def _parse_block(self
, block
, source_key
, validcountries
= None):
658 # Get first line to find out what type of block this is
662 if line
.startswith("aut-num:"):
663 return self
._parse
_autnum
_block
(block
, source_key
)
666 if line
.startswith("inet6num:") or line
.startswith("inetnum:"):
667 return self
._parse
_inetnum
_block
(block
, source_key
, validcountries
)
670 elif line
.startswith("organisation:"):
671 return self
._parse
_org
_block
(block
, source_key
)
673 def _parse_autnum_block(self
, block
, source_key
):
677 key
, val
= split_line(line
)
680 m
= re
.match(r
"^(AS|as)(\d+)", val
)
682 autnum
["asn"] = m
.group(2)
685 autnum
[key
] = val
.upper()
688 # Save the first description line as well...
689 if not key
in autnum
:
693 if not autnum
or not "asn" in autnum
:
696 # Insert a dummy organisation handle into our temporary organisations
697 # table in case the AS does not have an organisation handle set, but
698 # has a description (a quirk often observed in APNIC area), so we can
699 # later display at least some string for this AS.
700 if not "org" in autnum
:
701 if "descr" in autnum
:
702 autnum
["org"] = "LIBLOC-%s-ORGHANDLE" % autnum
.get("asn")
704 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
705 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
706 autnum
.get("org"), autnum
.get("descr"), source_key
,
709 log
.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
713 # Insert into database
714 self
.db
.execute("INSERT INTO _autnums(number, organization, source) \
715 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
716 organization = excluded.organization",
717 autnum
.get("asn"), autnum
.get("org"), source_key
,
720 def _parse_inetnum_block(self
, block
, source_key
, validcountries
= None):
721 log
.debug("Parsing inetnum block:")
728 key
, val
= split_line(line
)
730 # Filter any inetnum records which are only referring to IP space
731 # not managed by that specific RIR...
733 if re
.match(r
"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val
.strip()):
734 log
.debug("Skipping record indicating historic/orphaned data: %s" % val
.strip())
738 start_address
, delim
, end_address
= val
.partition("-")
740 # Strip any excess space
741 start_address
, end_address
= start_address
.rstrip(), end_address
.strip()
743 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
744 if start_address
and not (delim
or end_address
):
746 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
748 start_address
= start_address
.split("/")
749 ldigits
= start_address
[0].count(".")
751 # How many octets do we need to add?
752 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
754 start_address
= start_address
[0] + ".0.0/" + start_address
[1]
756 start_address
= start_address
[0] + ".0/" + start_address
[1]
758 log
.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line
)
762 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
764 log
.warning("Could not parse line in LACNIC DB format: %s" % line
)
767 # Enumerate first and last IP address of this network
768 end_address
= start_address
[-1]
769 start_address
= start_address
[0]
772 # Convert to IP address
774 start_address
= ipaddress
.ip_address(start_address
)
775 end_address
= ipaddress
.ip_address(end_address
)
777 log
.warning("Could not parse line: %s" % line
)
780 inetnum
["inetnum"] = list(ipaddress
.summarize_address_range(start_address
, end_address
))
782 elif key
== "inet6num":
783 inetnum
[key
] = [ipaddress
.ip_network(val
, strict
=False)]
785 elif key
== "country":
788 # Catch RIR data objects with more than one country code...
789 if not key
in inetnum
:
792 if val
in inetnum
.get("country"):
793 # ... but keep this list distinct...
796 # When people set country codes to "UK", they actually mean "GB"
800 inetnum
[key
].append(val
)
803 if not inetnum
or not "country" in inetnum
:
806 # Prepare skipping objects with unknown country codes...
807 invalidcountries
= [singlecountry
for singlecountry
in inetnum
.get("country") if singlecountry
not in validcountries
]
809 # Iterate through all networks enumerated from above, check them for plausibility and insert
810 # them into the database, if _check_parsed_network() succeeded
811 for single_network
in inetnum
.get("inet6num") or inetnum
.get("inetnum"):
812 if self
._check
_parsed
_network
(single_network
):
814 # Skip objects with unknown country codes if they are valid to avoid log spam...
815 if validcountries
and invalidcountries
:
816 log
.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
817 (invalidcountries
, inetnum
.get("country"), inetnum
.get("inet6num") or inetnum
.get("inetnum")))
820 # Everything is fine here, run INSERT statement...
821 self
.db
.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
822 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
823 "%s" % single_network
, inetnum
.get("country")[0], inetnum
.get("country"), source_key
,
826 def _parse_org_block(self
, block
, source_key
):
830 key
, val
= split_line(line
)
832 if key
== "organisation":
833 org
[key
] = val
.upper()
834 elif key
== "org-name":
841 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
842 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
843 UPDATE SET name = excluded.name",
844 org
.get("organisation"), org
.get("org-name"), source_key
,
847 def _parse_line(self
, line
, source_key
, validcountries
= None):
849 if line
.startswith("2"):
853 if line
.startswith("#"):
857 registry
, country_code
, type, line
= line
.split("|", 3)
859 log
.warning("Could not parse line: %s" % line
)
862 # Skip any lines that are for stats only or do not have a country
863 # code at all (avoids log spam below)
864 if not country_code
or country_code
== '*':
867 # Skip objects with unknown country codes
868 if validcountries
and country_code
not in validcountries
:
869 log
.warning("Skipping line with bogus country '%s': %s" % \
870 (country_code
, line
))
873 if type in ("ipv6", "ipv4"):
874 return self
._parse
_ip
_line
(country_code
, type, line
, source_key
)
876 def _parse_ip_line(self
, country
, type, line
, source_key
):
878 address
, prefix
, date
, status
, organization
= line
.split("|")
882 # Try parsing the line without organization
884 address
, prefix
, date
, status
= line
.split("|")
886 log
.warning("Unhandled line format: %s" % line
)
889 # Skip anything that isn't properly assigned
890 if not status
in ("assigned", "allocated"):
893 # Cast prefix into an integer
897 log
.warning("Invalid prefix: %s" % prefix
)
900 # Fix prefix length for IPv4
902 prefix
= 32 - int(math
.log(prefix
, 2))
904 # Try to parse the address
906 network
= ipaddress
.ip_network("%s/%s" % (address
, prefix
), strict
=False)
908 log
.warning("Invalid IP address: %s" % address
)
911 if not self
._check
_parsed
_network
(network
):
914 self
.db
.execute("INSERT INTO networks(network, country, original_countries, source) \
915 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
916 UPDATE SET country = excluded.country",
917 "%s" % network
, country
, [country
], source_key
,
920 def _import_as_names_from_arin(self
):
921 downloader
= location
.importer
.Downloader()
923 # XXX: Download AS names file from ARIN (note that these names appear to be quite
924 # technical, not intended for human consumption, as description fields in
925 # organisation handles for other RIRs are - however, this is what we have got,
926 # and in some cases, it might be still better than nothing)
927 for line
in downloader
.request_lines("https://ftp.arin.net/info/asn.txt"):
928 # Valid lines start with a space, followed by the number of the Autonomous System ...
929 if not line
.startswith(" "):
932 # Split line and check if there is a valid ASN in it...
933 asn
, name
= line
.split()[0:2]
938 log
.debug("Skipping ARIN AS names line not containing an integer for ASN")
941 # Filter invalid ASNs...
942 if not self
._check
_parsed
_asn
(asn
):
945 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
946 if re
.match(r
"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|LACNIC|RIPE|IANA)(?:\d?$|\-)", name
):
949 # Bail out in case the AS name contains anything we do not expect here...
950 if re
.search(r
"[^a-zA-Z0-9-_]", name
):
951 log
.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
954 # Things look good here, run INSERT statement and skip this one if we already have
955 # a (better?) name for this Autonomous System...
961 ) VALUES (%s, %s, %s)
962 ON CONFLICT (number) DO NOTHING""",
968 def handle_update_announcements(self
, ns
):
969 server
= ns
.server
[0]
971 with self
.db
.transaction():
972 if server
.startswith("/"):
973 self
._handle
_update
_announcements
_from
_bird
(server
)
975 self
._handle
_update
_announcements
_from
_telnet
(server
)
977 # Purge anything we never want here
979 -- Delete default routes
980 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
982 -- Delete anything that is not global unicast address space
983 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
985 -- DELETE "current network" address space
986 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
988 -- DELETE local loopback address space
989 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
991 -- DELETE RFC 1918 address space
992 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
993 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
994 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
996 -- DELETE test, benchmark and documentation address space
997 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
998 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
999 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
1000 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
1001 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
1003 -- DELETE CGNAT address space (RFC 6598)
1004 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
1006 -- DELETE link local address space
1007 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
1009 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
1010 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
1011 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
1013 -- DELETE multicast and reserved address space
1014 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
1015 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
1017 -- Delete networks that are too small to be in the global routing table
1018 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
1019 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
1021 -- Delete any non-public or reserved ASNs
1022 DELETE FROM announcements WHERE NOT (
1023 (autnum >= 1 AND autnum <= 23455)
1025 (autnum >= 23457 AND autnum <= 64495)
1027 (autnum >= 131072 AND autnum <= 4199999999)
1030 -- Delete everything that we have not seen for 14 days
1031 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
1034 def _handle_update_announcements_from_bird(self
, server
):
1035 # Pre-compile the regular expression for faster searching
1036 route
= re
.compile(b
"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
1038 log
.info("Requesting routing table from Bird (%s)" % server
)
1040 aggregated_networks
= []
1042 # Send command to list all routes
1043 for line
in self
._bird
_cmd
(server
, "show route"):
1044 m
= route
.match(line
)
1050 # Ignore any header lines with the name of the routing table
1051 elif line
.startswith(b
"Table"):
1056 log
.debug("Could not parse line: %s" % line
.decode())
1060 # Fetch the extracted network and ASN
1061 network
, autnum
= m
.groups()
1063 # Decode into strings
1065 network
= network
.decode()
1067 autnum
= autnum
.decode()
1069 # Collect all aggregated networks
1071 log
.debug("%s is an aggregated network" % network
)
1072 aggregated_networks
.append(network
)
1075 # Insert it into the database
1076 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1077 VALUES(%s, %s) ON CONFLICT (network) DO \
1078 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1082 # Process any aggregated networks
1083 for network
in aggregated_networks
:
1084 log
.debug("Processing aggregated network %s" % network
)
1086 # Run "show route all" for each network
1087 for line
in self
._bird
_cmd
(server
, "show route %s all" % network
):
1088 # Try finding the path
1089 m
= re
.match(b
"\s+BGP\.as_path:.* (\d+) {\d+}$", line
)
1091 # Select the last AS number in the path
1092 autnum
= m
.group(1).decode()
1094 # Insert it into the database
1095 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1096 VALUES(%s, %s) ON CONFLICT (network) DO \
1097 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1101 # We don't need to process any more
1104 def _handle_update_announcements_from_telnet(self
, server
):
1105 # Pre-compile regular expression for routes
1106 route
= re
.compile(b
"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re
.MULTILINE|re
.DOTALL
)
1108 with telnetlib
.Telnet(server
) as t
:
1111 # t.set_debuglevel(10)
1113 # Wait for console greeting
1114 greeting
= t
.read_until(b
"> ", timeout
=30)
1116 log
.error("Could not get a console prompt")
1119 # Disable pagination
1120 t
.write(b
"terminal length 0\n")
1122 # Wait for the prompt to return
1125 # Fetch the routing tables
1126 for protocol
in ("ipv6", "ipv4"):
1127 log
.info("Requesting %s routing table" % protocol
)
1129 # Request the full unicast routing table
1130 t
.write(b
"show bgp %s unicast\n" % protocol
.encode())
1132 # Read entire header which ends with "Path"
1133 t
.read_until(b
"Path\r\n")
1136 # Try reading a full entry
1137 # Those might be broken across multiple lines but ends with i
1138 line
= t
.read_until(b
"i\r\n", timeout
=5)
1142 # Show line for debugging
1143 #log.debug(repr(line))
1145 # Try finding a route in here
1146 m
= route
.match(line
)
1148 network
, autnum
= m
.groups()
1150 # Convert network to string
1151 network
= network
.decode()
1153 # Append /24 for IPv4 addresses
1154 if not "/" in network
and not ":" in network
:
1155 network
= "%s/24" % network
1157 # Convert AS number to integer
1158 autnum
= int(autnum
)
1160 log
.info("Found announcement for %s by %s" % (network
, autnum
))
1162 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1163 VALUES(%s, %s) ON CONFLICT (network) DO \
1164 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1168 log
.info("Finished reading the %s routing table" % protocol
)
1170 def _bird_cmd(self
, socket_path
, command
):
1171 # Connect to the socket
1172 s
= socket
.socket(socket
.AF_UNIX
, socket
.SOCK_STREAM
)
1173 s
.connect(socket_path
)
1175 # Allocate some buffer
1178 log
.debug("Sending Bird command: %s" % command
)
1181 s
.send(b
"%s\n" % command
.encode())
1184 # Fill up the buffer
1185 buffer += s
.recv(4096)
1188 # Search for the next newline
1189 pos
= buffer.find(b
"\n")
1191 # If we cannot find one, we go back and read more data
1195 # Cut after the newline character
1198 # Split the line we want and keep the rest in buffer
1199 line
, buffer = buffer[:pos
], buffer[pos
:]
1201 # Try parsing any status lines
1202 if len(line
) > 4 and line
[:4].isdigit() and line
[4] in (32, 45):
1203 code
, delim
, line
= int(line
[:4]), line
[4], line
[5:]
1205 log
.debug("Received response code %s from bird" % code
)
1215 # Otherwise return the line
1218 def handle_update_overrides(self
, ns
):
1219 with self
.db
.transaction():
1220 # Only drop manually created overrides, as we can be reasonably sure to have them,
1221 # and preserve the rest. If appropriate, it is deleted by correspondent functions.
1223 DELETE FROM autnum_overrides WHERE source = 'manual';
1224 DELETE FROM network_overrides WHERE source = 'manual';
1227 # Update overrides for various cloud providers big enough to publish their own IP
1228 # network allocation lists in a machine-readable format...
1229 self
._update
_overrides
_for
_aws
()
1231 # Update overrides for Spamhaus DROP feeds...
1232 self
._update
_overrides
_for
_spamhaus
_drop
()
1234 for file in ns
.files
:
1235 log
.info("Reading %s..." % file)
1237 with
open(file, "rb") as f
:
1238 for type, block
in location
.importer
.read_blocks(f
):
1240 network
= block
.get("net")
1241 # Try to parse and normalise the network
1243 network
= ipaddress
.ip_network(network
, strict
=False)
1244 except ValueError as e
:
1245 log
.warning("Invalid IP network: %s: %s" % (network
, e
))
1248 # Prevent that we overwrite all networks
1249 if network
.prefixlen
== 0:
1250 log
.warning("Skipping %s: You cannot overwrite default" % network
)
1254 INSERT INTO network_overrides(
1259 is_satellite_provider,
1262 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
1263 ON CONFLICT (network) DO NOTHING""",
1265 block
.get("country"),
1267 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1268 self
._parse
_bool
(block
, "is-satellite-provider"),
1269 self
._parse
_bool
(block
, "is-anycast"),
1270 self
._parse
_bool
(block
, "drop"),
1273 elif type == "aut-num":
1274 autnum
= block
.get("aut-num")
1276 # Check if AS number begins with "AS"
1277 if not autnum
.startswith("AS"):
1278 log
.warning("Invalid AS number: %s" % autnum
)
1285 INSERT INTO autnum_overrides(
1291 is_satellite_provider,
1294 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
1295 ON CONFLICT DO NOTHING""",
1298 block
.get("country"),
1300 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1301 self
._parse
_bool
(block
, "is-satellite-provider"),
1302 self
._parse
_bool
(block
, "is-anycast"),
1303 self
._parse
_bool
(block
, "drop"),
1307 log
.warning("Unsupported type: %s" % type)
1309 def _update_overrides_for_aws(self
):
1310 # Download Amazon AWS IP allocation file to create overrides...
1311 downloader
= location
.importer
.Downloader()
1315 f
= downloader
.retrieve("https://ip-ranges.amazonaws.com/ip-ranges.json")
1317 # Parse downloaded file
1318 aws_ip_dump
= json
.load(f
)
1319 except Exception as e
:
1320 log
.error("unable to preprocess Amazon AWS IP ranges: %s" % e
)
1323 # At this point, we can assume the downloaded file to be valid
1325 DELETE FROM network_overrides WHERE source = 'Amazon AWS IP feed';
1328 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1329 # there seems to be no machine-readable version available of this other than
1330 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1331 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1332 # was helpful here as well.
1333 aws_region_country_map
= {
1338 "ap-northeast-3": "JP",
1339 "ap-northeast-2": "KR",
1340 "ap-southeast-1": "SG",
1341 "ap-southeast-2": "AU",
1342 "ap-southeast-3": "MY",
1343 "ap-southeast-4": "AU",
1344 "ap-northeast-1": "JP",
1345 "ca-central-1": "CA",
1346 "eu-central-1": "DE",
1347 "eu-central-2": "CH",
1354 "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
1355 "me-central-1": "AE",
1360 # Fetch all valid country codes to check parsed networks aganist...
1361 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
1365 validcountries
.append(row
.country_code
)
1367 with self
.db
.transaction():
1368 for snetwork
in aws_ip_dump
["prefixes"] + aws_ip_dump
["ipv6_prefixes"]:
1370 network
= ipaddress
.ip_network(snetwork
.get("ip_prefix") or snetwork
.get("ipv6_prefix"), strict
=False)
1372 log
.warning("Unable to parse line: %s" % snetwork
)
1375 # Sanitize parsed networks...
1376 if not self
._check
_parsed
_network
(network
):
1379 # Determine region of this network...
1380 region
= snetwork
["region"]
1384 # Any region name starting with "us-" will get "US" country code assigned straight away...
1385 if region
.startswith("us-"):
1387 elif region
.startswith("cn-"):
1388 # ... same goes for China ...
1390 elif region
== "GLOBAL":
1391 # ... funny region name for anycast-like networks ...
1393 elif region
in aws_region_country_map
:
1394 # ... assign looked up country code otherwise ...
1395 cc
= aws_region_country_map
[region
]
1397 # ... and bail out if we are missing something here
1398 log
.warning("Unable to determine country code for line: %s" % snetwork
)
1401 # Skip networks with unknown country codes
1402 if not is_anycast
and validcountries
and cc
not in validcountries
:
1403 log
.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1407 # Conduct SQL statement...
1409 INSERT INTO network_overrides(
1414 is_satellite_provider,
1416 ) VALUES (%s, %s, %s, %s, %s, %s)
1417 ON CONFLICT (network) DO NOTHING""",
1420 "Amazon AWS IP feed",
1427 def _update_overrides_for_spamhaus_drop(self
):
1428 downloader
= location
.importer
.Downloader()
1431 "https://www.spamhaus.org/drop/drop.txt",
1432 "https://www.spamhaus.org/drop/edrop.txt",
1433 "https://www.spamhaus.org/drop/dropv6.txt"
1437 "https://www.spamhaus.org/drop/asndrop.txt"
1442 f
= downloader
.retrieve(url
)
1445 fcontent
= f
.readlines()
1447 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
1449 if len(fcontent
) > 10:
1451 DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list';
1452 DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists';
1455 log
.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url
)
1458 # Iterate through every line, filter comments and add remaining networks to
1459 # the override table in case they are valid...
1460 with self
.db
.transaction():
1461 for sline
in fcontent
:
1462 # The response is assumed to be encoded in UTF-8...
1463 sline
= sline
.decode("utf-8")
1465 # Comments start with a semicolon...
1466 if sline
.startswith(";"):
1469 # Extract network and ignore anything afterwards...
1471 network
= ipaddress
.ip_network(sline
.split()[0], strict
=False)
1473 log
.error("Unable to parse line: %s" % sline
)
1476 # Sanitize parsed networks...
1477 if not self
._check
_parsed
_network
(network
):
1478 log
.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
1482 # Conduct SQL statement...
1484 INSERT INTO network_overrides(
1488 ) VALUES (%s, %s, %s)
1489 ON CONFLICT (network) DO UPDATE SET is_drop = True""",
1491 "Spamhaus DROP lists",
1495 for url
in asn_urls
:
1497 f
= downloader
.retrieve(url
)
1499 # Iterate through every line, filter comments and add remaining ASNs to
1500 # the override table in case they are valid...
1501 with self
.db
.transaction():
1502 for sline
in f
.readlines():
1503 # The response is assumed to be encoded in UTF-8...
1504 sline
= sline
.decode("utf-8")
1506 # Comments start with a semicolon...
1507 if sline
.startswith(";"):
1510 # Throw away anything after the first space...
1511 sline
= sline
.split()[0]
1513 # ... strip the "AS" prefix from it ...
1514 sline
= sline
.strip("AS")
1516 # ... and convert it into an integer. Voila.
1519 # Filter invalid ASNs...
1520 if not self
._check
_parsed
_asn
(asn
):
1521 log
.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
1525 # Conduct SQL statement...
1527 INSERT INTO autnum_overrides(
1531 ) VALUES (%s, %s, %s)
1532 ON CONFLICT (number) DO UPDATE SET is_drop = True""",
1534 "Spamhaus ASN-DROP list",
1539 def _parse_bool(block
, key
):
1540 val
= block
.get(key
)
1542 # There is no point to proceed when we got None
1546 # Convert to lowercase
1550 if val
in ("yes", "1"):
1554 if val
in ("no", "0"):
1561 def countries(self
):
1562 # Fetch all valid country codes to check parsed networks aganist
1563 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
1565 # Return all countries
1566 return [row
.country_code
for row
in rows
]
1568 def handle_import_countries(self
, ns
):
1569 with self
.db
.transaction():
1570 # Drop all data that we have
1571 self
.db
.execute("TRUNCATE TABLE countries")
1573 for file in ns
.file:
1575 line
= line
.rstrip()
1577 # Ignore any comments
1578 if line
.startswith("#"):
1582 country_code
, continent_code
, name
= line
.split(maxsplit
=2)
1584 log
.warning("Could not parse line: %s" % line
)
1587 self
.db
.execute("INSERT INTO countries(country_code, name, continent_code) \
1588 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code
, name
, continent_code
)
1591 def split_line(line
):
1592 key
, colon
, val
= line
.partition(":")
1594 # Strip any excess space
1601 # Run the command line interface