2 ###############################################################################
4 # libloc - A library to determine the location of someone on the Internet #
6 # Copyright (C) 2020-2021 IPFire Development Team <info@ipfire.org> #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
18 ###############################################################################
30 # Load our location module
32 import location
.database
33 import location
.importer
34 from location
.i18n
import _
37 log
= logging
.getLogger("location.importer")
50 parser
= argparse
.ArgumentParser(
51 description
=_("Location Importer Command Line Interface"),
53 subparsers
= parser
.add_subparsers()
55 # Global configuration flags
56 parser
.add_argument("--debug", action
="store_true",
57 help=_("Enable debug output"))
58 parser
.add_argument("--quiet", action
="store_true",
59 help=_("Enable quiet mode"))
62 parser
.add_argument("--version", action
="version",
63 version
="%(prog)s @VERSION@")
66 parser
.add_argument("--database-host", required
=True,
67 help=_("Database Hostname"), metavar
=_("HOST"))
68 parser
.add_argument("--database-name", required
=True,
69 help=_("Database Name"), metavar
=_("NAME"))
70 parser
.add_argument("--database-username", required
=True,
71 help=_("Database Username"), metavar
=_("USERNAME"))
72 parser
.add_argument("--database-password", required
=True,
73 help=_("Database Password"), metavar
=_("PASSWORD"))
76 write
= subparsers
.add_parser("write", help=_("Write database to file"))
77 write
.set_defaults(func
=self
.handle_write
)
78 write
.add_argument("file", nargs
=1, help=_("Database File"))
79 write
.add_argument("--signing-key", nargs
="?", type=open, help=_("Signing Key"))
80 write
.add_argument("--backup-signing-key", nargs
="?", type=open, help=_("Backup Signing Key"))
81 write
.add_argument("--vendor", nargs
="?", help=_("Sets the vendor"))
82 write
.add_argument("--description", nargs
="?", help=_("Sets a description"))
83 write
.add_argument("--license", nargs
="?", help=_("Sets the license"))
84 write
.add_argument("--version", type=int, help=_("Database Format Version"))
87 update_whois
= subparsers
.add_parser("update-whois", help=_("Update WHOIS Information"))
88 update_whois
.set_defaults(func
=self
.handle_update_whois
)
90 # Update announcements
91 update_announcements
= subparsers
.add_parser("update-announcements",
92 help=_("Update BGP Annoucements"))
93 update_announcements
.set_defaults(func
=self
.handle_update_announcements
)
94 update_announcements
.add_argument("server", nargs
=1,
95 help=_("Route Server to connect to"), metavar
=_("SERVER"))
98 update_overrides
= subparsers
.add_parser("update-overrides",
99 help=_("Update overrides"),
101 update_overrides
.add_argument(
102 "files", nargs
="+", help=_("Files to import"),
104 update_overrides
.set_defaults(func
=self
.handle_update_overrides
)
107 import_countries
= subparsers
.add_parser("import-countries",
108 help=_("Import countries"),
110 import_countries
.add_argument("file", nargs
=1, type=argparse
.FileType("r"),
111 help=_("File to import"))
112 import_countries
.set_defaults(func
=self
.handle_import_countries
)
114 args
= parser
.parse_args()
118 location
.logger
.set_level(logging
.DEBUG
)
120 location
.logger
.set_level(logging
.WARNING
)
122 # Print usage if no action was given
123 if not "func" in args
:
130 # Parse command line arguments
131 args
= self
.parse_cli()
133 # Initialise database
134 self
.db
= self
._setup
_database
(args
)
137 ret
= args
.func(args
)
139 # Return with exit code
143 # Otherwise just exit
146 def _setup_database(self
, ns
):
148 Initialise the database
150 # Connect to database
151 db
= location
.database
.Connection(
152 host
=ns
.database_host
, database
=ns
.database_name
,
153 user
=ns
.database_username
, password
=ns
.database_password
,
156 with db
.transaction():
159 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
160 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
161 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
162 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
163 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
164 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
167 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
168 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
169 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
172 CREATE TABLE IF NOT EXISTS countries(
173 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
174 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
177 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
178 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
179 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
180 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
181 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
182 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
185 CREATE TABLE IF NOT EXISTS autnum_overrides(
186 number bigint NOT NULL,
189 is_anonymous_proxy boolean,
190 is_satellite_provider boolean,
193 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
194 ON autnum_overrides(number);
195 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
196 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
198 CREATE TABLE IF NOT EXISTS network_overrides(
199 network inet NOT NULL,
201 is_anonymous_proxy boolean,
202 is_satellite_provider boolean,
205 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
206 ON network_overrides(network);
207 CREATE INDEX IF NOT EXISTS network_overrides_search
208 ON network_overrides USING GIST(network inet_ops);
209 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
210 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
215 def handle_write(self
, ns
):
217 Compiles a database in libloc format out of what is in the database
220 writer
= location
.Writer(ns
.signing_key
, ns
.backup_signing_key
)
224 writer
.vendor
= ns
.vendor
227 writer
.description
= ns
.description
230 writer
.license
= ns
.license
232 # Add all Autonomous Systems
233 log
.info("Writing Autonomous Systems...")
235 # Select all ASes with a name
236 rows
= self
.db
.query("""
238 autnums.number AS number,
240 (SELECT overrides.name FROM autnum_overrides overrides
241 WHERE overrides.number = autnums.number),
245 WHERE name <> %s ORDER BY number
249 a
= writer
.add_as(row
.number
)
253 log
.info("Writing networks...")
255 # Select all known networks
256 rows
= self
.db
.query("""
257 -- Return a list of those networks enriched with all
258 -- other information that we store in the database
260 DISTINCT ON (network)
267 SELECT country FROM network_overrides overrides
268 WHERE networks.network <<= overrides.network
269 ORDER BY masklen(overrides.network) DESC
273 SELECT country FROM autnum_overrides overrides
274 WHERE networks.autnum = overrides.number
282 SELECT is_anonymous_proxy FROM network_overrides overrides
283 WHERE networks.network <<= overrides.network
284 ORDER BY masklen(overrides.network) DESC
288 SELECT is_anonymous_proxy FROM autnum_overrides overrides
289 WHERE networks.autnum = overrides.number
292 ) AS is_anonymous_proxy,
295 SELECT is_satellite_provider FROM network_overrides overrides
296 WHERE networks.network <<= overrides.network
297 ORDER BY masklen(overrides.network) DESC
301 SELECT is_satellite_provider FROM autnum_overrides overrides
302 WHERE networks.autnum = overrides.number
305 ) AS is_satellite_provider,
308 SELECT is_anycast FROM network_overrides overrides
309 WHERE networks.network <<= overrides.network
310 ORDER BY masklen(overrides.network) DESC
314 SELECT is_anycast FROM autnum_overrides overrides
315 WHERE networks.autnum = overrides.number
321 SELECT is_drop FROM network_overrides overrides
322 WHERE networks.network <<= overrides.network
323 ORDER BY masklen(overrides.network) DESC
327 SELECT is_drop FROM autnum_overrides overrides
328 WHERE networks.autnum = overrides.number
334 known_networks.network AS network,
335 announcements.autnum AS autnum,
336 networks.country AS country,
338 -- Must be part of returned values for ORDER BY clause
339 masklen(announcements.network) AS sort_a,
340 masklen(networks.network) AS sort_b
342 SELECT network FROM announcements
344 SELECT network FROM networks
346 SELECT network FROM network_overrides
349 announcements ON known_networks.network <<= announcements.network
351 networks ON known_networks.network <<= networks.network
353 known_networks.network,
360 network
= writer
.add_network(row
.network
)
364 network
.country_code
= row
.country
368 network
.asn
= row
.autnum
371 if row
.is_anonymous_proxy
:
372 network
.set_flag(location
.NETWORK_FLAG_ANONYMOUS_PROXY
)
374 if row
.is_satellite_provider
:
375 network
.set_flag(location
.NETWORK_FLAG_SATELLITE_PROVIDER
)
378 network
.set_flag(location
.NETWORK_FLAG_ANYCAST
)
381 network
.set_flag(location
.NETWORK_FLAG_DROP
)
384 log
.info("Writing countries...")
385 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
388 c
= writer
.add_country(row
.country_code
)
389 c
.continent_code
= row
.continent_code
392 # Write everything to file
393 log
.info("Writing database to file...")
397 def handle_update_whois(self
, ns
):
398 downloader
= location
.importer
.Downloader()
400 # Download all sources
401 with self
.db
.transaction():
402 # Create some temporary tables to store parsed data
404 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL, organization text NOT NULL, source text NOT NULL)
406 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
408 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL, name text NOT NULL, source text NOT NULL)
410 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
412 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL)
414 CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
415 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
418 # Remove all previously imported content
420 TRUNCATE TABLE networks;
423 # Fetch all valid country codes to check parsed networks aganist...
424 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
428 validcountries
.append(row
.country_code
)
430 for source_key
in location
.importer
.WHOIS_SOURCES
:
431 for single_url
in location
.importer
.WHOIS_SOURCES
[source_key
]:
432 with downloader
.request(single_url
, return_blocks
=True) as f
:
434 self
._parse
_block
(block
, source_key
, validcountries
)
436 # Process all parsed networks from every RIR we happen to have access to,
437 # insert the largest network chunks into the networks table immediately...
438 families
= self
.db
.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
440 for family
in (row
.family
for row
in families
):
441 smallest
= self
.db
.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family
)
443 self
.db
.execute("INSERT INTO networks(network, country, original_countries, source) \
444 SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest
.prefix
, family
)
446 # ... determine any other prefixes for this network family, ...
447 prefixes
= self
.db
.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
448 WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family
)
450 # ... and insert networks with this prefix in case they provide additional
451 # information (i. e. subnet of a larger chunk with a different country)
452 for prefix
in (row
.prefix
for row
in prefixes
):
458 _rirdata.original_countries,
463 family(_rirdata.network) = %s
465 masklen(_rirdata.network) = %s
469 DISTINCT ON (c.network)
472 c.original_countries,
474 masklen(networks.network),
475 networks.country AS parent_country
481 c.network << networks.network
484 masklen(networks.network) DESC NULLS LAST
487 networks(network, country, original_countries, source)
496 parent_country IS NULL
498 country <> parent_country
499 ON CONFLICT DO NOTHING""",
504 INSERT INTO autnums(number, name, source)
505 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
506 JOIN _organizations ON _autnums.organization = _organizations.handle
507 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
510 # Download all extended sources
511 for source_key
in location
.importer
.EXTENDED_SOURCES
:
512 for single_url
in location
.importer
.EXTENDED_SOURCES
[source_key
]:
513 with self
.db
.transaction():
515 with downloader
.request(single_url
) as f
:
517 self
._parse
_line
(line
, source_key
, validcountries
)
519 # Download and import (technical) AS names from ARIN
520 self
._import
_as
_names
_from
_arin
()
522 def _check_parsed_network(self
, network
):
524 Assistive function to detect and subsequently sort out parsed
525 networks from RIR data (both Whois and so-called "extended sources"),
528 (a) not globally routable (RFC 1918 space, et al.)
529 (b) covering a too large chunk of the IP address space (prefix length
530 is < 7 for IPv4 networks, and < 10 for IPv6)
531 (c) "0.0.0.0" or "::" as a network address
532 (d) are too small for being publicly announced (we have decided not to
533 process them at the moment, as they significantly enlarge our
534 database without providing very helpful additional information)
536 This unfortunately is necessary due to brain-dead clutter across
537 various RIR databases, causing mismatches and eventually disruptions.
539 We will return False in case a network is not suitable for adding
540 it to our database, and True otherwise.
543 if not network
or not (isinstance(network
, ipaddress
.IPv4Network
) or isinstance(network
, ipaddress
.IPv6Network
)):
546 if not network
.is_global
:
547 log
.debug("Skipping non-globally routable network: %s" % network
)
550 if network
.version
== 4:
551 if network
.prefixlen
< 7:
552 log
.debug("Skipping too big IP chunk: %s" % network
)
555 if network
.prefixlen
> 24:
556 log
.debug("Skipping network too small to be publicly announced: %s" % network
)
559 if str(network
.network_address
) == "0.0.0.0":
560 log
.debug("Skipping network based on 0.0.0.0: %s" % network
)
563 elif network
.version
== 6:
564 if network
.prefixlen
< 10:
565 log
.debug("Skipping too big IP chunk: %s" % network
)
568 if network
.prefixlen
> 48:
569 log
.debug("Skipping network too small to be publicly announced: %s" % network
)
572 if str(network
.network_address
) == "::":
573 log
.debug("Skipping network based on '::': %s" % network
)
577 # This should not happen...
578 log
.warning("Skipping network of unknown family, this should not happen: %s" % network
)
581 # In case we have made it here, the network is considered to
582 # be suitable for libloc consumption...
585 def _check_parsed_asn(self
, asn
):
587 Assistive function to filter Autonomous System Numbers not being suitable
588 for adding to our database. Returns False in such cases, and True otherwise.
591 for start
, end
in VALID_ASN_RANGES
:
592 if start
<= asn
and end
>= asn
:
595 log
.info("Supplied ASN %s out of publicly routable ASN ranges" % asn
)
598 def _parse_block(self
, block
, source_key
, validcountries
= None):
599 # Get first line to find out what type of block this is
603 if line
.startswith("aut-num:"):
604 return self
._parse
_autnum
_block
(block
, source_key
)
607 if line
.startswith("inet6num:") or line
.startswith("inetnum:"):
608 return self
._parse
_inetnum
_block
(block
, source_key
, validcountries
)
611 elif line
.startswith("organisation:"):
612 return self
._parse
_org
_block
(block
, source_key
)
614 def _parse_autnum_block(self
, block
, source_key
):
618 key
, val
= split_line(line
)
621 m
= re
.match(r
"^(AS|as)(\d+)", val
)
623 autnum
["asn"] = m
.group(2)
626 autnum
[key
] = val
.upper()
629 # Save the first description line as well...
630 if not key
in autnum
:
634 if not autnum
or not "asn" in autnum
:
637 # Insert a dummy organisation handle into our temporary organisations
638 # table in case the AS does not have an organisation handle set, but
639 # has a description (a quirk often observed in APNIC area), so we can
640 # later display at least some string for this AS.
641 if not "org" in autnum
:
642 if "descr" in autnum
:
643 autnum
["org"] = "LIBLOC-%s-ORGHANDLE" % autnum
.get("asn")
645 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
646 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
647 autnum
.get("org"), autnum
.get("descr"), source_key
,
650 log
.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
654 # Insert into database
655 self
.db
.execute("INSERT INTO _autnums(number, organization, source) \
656 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
657 organization = excluded.organization",
658 autnum
.get("asn"), autnum
.get("org"), source_key
,
661 def _parse_inetnum_block(self
, block
, source_key
, validcountries
= None):
662 log
.debug("Parsing inetnum block:")
669 key
, val
= split_line(line
)
671 # Filter any inetnum records which are only referring to IP space
672 # not managed by that specific RIR...
674 if re
.match(r
"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val
.strip()):
675 log
.debug("Skipping record indicating historic/orphaned data: %s" % val
.strip())
679 start_address
, delim
, end_address
= val
.partition("-")
681 # Strip any excess space
682 start_address
, end_address
= start_address
.rstrip(), end_address
.strip()
684 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
685 if start_address
and not (delim
or end_address
):
687 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
689 start_address
= start_address
.split("/")
690 ldigits
= len(start_address
[0].split("."))
692 # How many octets do we need to add?
693 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
695 start_address
= start_address
[0] + ".0.0/" + start_address
[1]
697 start_address
= start_address
[0] + ".0/" + start_address
[1]
699 log
.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line
)
703 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
705 log
.warning("Could not parse line in LACNIC DB format: %s" % line
)
708 # Enumerate first and last IP address of this network
709 end_address
= start_address
[-1]
710 start_address
= start_address
[0]
713 # Convert to IP address
715 start_address
= ipaddress
.ip_address(start_address
)
716 end_address
= ipaddress
.ip_address(end_address
)
718 log
.warning("Could not parse line: %s" % line
)
721 inetnum
["inetnum"] = list(ipaddress
.summarize_address_range(start_address
, end_address
))
723 elif key
== "inet6num":
724 inetnum
[key
] = [ipaddress
.ip_network(val
, strict
=False)]
726 elif key
== "country":
729 # Catch RIR data objects with more than one country code...
730 if not key
in inetnum
:
733 if val
in inetnum
.get("country"):
734 # ... but keep this list distinct...
737 # When people set country codes to "UK", they actually mean "GB"
741 inetnum
[key
].append(val
)
744 if not inetnum
or not "country" in inetnum
:
747 # Prepare skipping objects with unknown country codes...
748 invalidcountries
= [singlecountry
for singlecountry
in inetnum
.get("country") if singlecountry
not in validcountries
]
750 # Iterate through all networks enumerated from above, check them for plausibility and insert
751 # them into the database, if _check_parsed_network() succeeded
752 for single_network
in inetnum
.get("inet6num") or inetnum
.get("inetnum"):
753 if self
._check
_parsed
_network
(single_network
):
755 # Skip objects with unknown country codes if they are valid to avoid log spam...
756 if validcountries
and invalidcountries
:
757 log
.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
758 (invalidcountries
, inetnum
.get("country"), inetnum
.get("inet6num") or inetnum
.get("inetnum")))
761 # Everything is fine here, run INSERT statement...
762 self
.db
.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
763 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
764 "%s" % single_network
, inetnum
.get("country")[0], inetnum
.get("country"), source_key
,
767 def _parse_org_block(self
, block
, source_key
):
771 key
, val
= split_line(line
)
773 if key
== "organisation":
774 org
[key
] = val
.upper()
775 elif key
== "org-name":
782 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
783 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
784 UPDATE SET name = excluded.name",
785 org
.get("organisation"), org
.get("org-name"), source_key
,
788 def _parse_line(self
, line
, source_key
, validcountries
= None):
790 if line
.startswith("2"):
794 if line
.startswith("#"):
798 registry
, country_code
, type, line
= line
.split("|", 3)
800 log
.warning("Could not parse line: %s" % line
)
803 # Skip any lines that are for stats only or do not have a country
804 # code at all (avoids log spam below)
805 if not country_code
or country_code
== '*':
808 # Skip objects with unknown country codes
809 if validcountries
and country_code
not in validcountries
:
810 log
.warning("Skipping line with bogus country '%s': %s" % \
811 (country_code
, line
))
814 if type in ("ipv6", "ipv4"):
815 return self
._parse
_ip
_line
(country_code
, type, line
, source_key
)
817 def _parse_ip_line(self
, country
, type, line
, source_key
):
819 address
, prefix
, date
, status
, organization
= line
.split("|")
823 # Try parsing the line without organization
825 address
, prefix
, date
, status
= line
.split("|")
827 log
.warning("Unhandled line format: %s" % line
)
830 # Skip anything that isn't properly assigned
831 if not status
in ("assigned", "allocated"):
834 # Cast prefix into an integer
838 log
.warning("Invalid prefix: %s" % prefix
)
841 # Fix prefix length for IPv4
843 prefix
= 32 - int(math
.log(prefix
, 2))
845 # Try to parse the address
847 network
= ipaddress
.ip_network("%s/%s" % (address
, prefix
), strict
=False)
849 log
.warning("Invalid IP address: %s" % address
)
852 if not self
._check
_parsed
_network
(network
):
855 self
.db
.execute("INSERT INTO networks(network, country, original_countries, source) \
856 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
857 UPDATE SET country = excluded.country",
858 "%s" % network
, country
, [country
], source_key
,
861 def _import_as_names_from_arin(self
):
862 downloader
= location
.importer
.Downloader()
864 # XXX: Download AS names file from ARIN (note that these names appear to be quite
865 # technical, not intended for human consumption, as description fields in
866 # organisation handles for other RIRs are - however, this is what we have got,
867 # and in some cases, it might be still better than nothing)
868 with downloader
.request("https://ftp.arin.net/info/asn.txt", return_blocks
=False) as f
:
870 # Convert binary line to string...
873 # ... valid lines start with a space, followed by the number of the Autonomous System ...
874 if not line
.startswith(" "):
877 # Split line and check if there is a valid ASN in it...
878 asn
, name
= line
.split()[0:2]
883 log
.debug("Skipping ARIN AS names line not containing an integer for ASN")
886 # Filter invalid ASNs...
887 if not self
._check
_parsed
_asn
(asn
):
890 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
891 if re
.match(r
"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|DNIC|LACNIC|RIPE|IANA)(?:\d?$|\-)", name
):
894 # Bail out in case the AS name contains anything we do not expect here...
895 if re
.search(r
"[^a-zA-Z0-9-_]", name
):
896 log
.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
899 # Things look good here, run INSERT statement and skip this one if we already have
900 # a (better?) name for this Autonomous System...
906 ) VALUES (%s, %s, %s)
907 ON CONFLICT (number) DO NOTHING""",
913 def handle_update_announcements(self
, ns
):
914 server
= ns
.server
[0]
916 with self
.db
.transaction():
917 if server
.startswith("/"):
918 self
._handle
_update
_announcements
_from
_bird
(server
)
920 self
._handle
_update
_announcements
_from
_telnet
(server
)
922 # Purge anything we never want here
924 -- Delete default routes
925 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
927 -- Delete anything that is not global unicast address space
928 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
930 -- DELETE "current network" address space
931 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
933 -- DELETE local loopback address space
934 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
936 -- DELETE RFC 1918 address space
937 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
938 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
939 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
941 -- DELETE test, benchmark and documentation address space
942 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
943 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
944 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
945 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
946 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
948 -- DELETE CGNAT address space (RFC 6598)
949 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
951 -- DELETE link local address space
952 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
954 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
955 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
956 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
958 -- DELETE multicast and reserved address space
959 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
960 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
962 -- Delete networks that are too small to be in the global routing table
963 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
964 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
966 -- Delete any non-public or reserved ASNs
967 DELETE FROM announcements WHERE NOT (
968 (autnum >= 1 AND autnum <= 23455)
970 (autnum >= 23457 AND autnum <= 64495)
972 (autnum >= 131072 AND autnum <= 4199999999)
975 -- Delete everything that we have not seen for 14 days
976 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
979 def _handle_update_announcements_from_bird(self
, server
):
980 # Pre-compile the regular expression for faster searching
981 route
= re
.compile(b
"^\s(.+?)\s+.+?\[AS(.*?).\]$")
983 log
.info("Requesting routing table from Bird (%s)" % server
)
985 # Send command to list all routes
986 for line
in self
._bird
_cmd
(server
, "show route"):
987 m
= route
.match(line
)
989 log
.debug("Could not parse line: %s" % line
.decode())
992 # Fetch the extracted network and ASN
993 network
, autnum
= m
.groups()
995 # Insert it into the database
996 self
.db
.execute("INSERT INTO announcements(network, autnum) \
997 VALUES(%s, %s) ON CONFLICT (network) DO \
998 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
999 network
.decode(), autnum
.decode(),
1002 def _handle_update_announcements_from_telnet(self
, server
):
1003 # Pre-compile regular expression for routes
1004 route
= re
.compile(b
"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re
.MULTILINE|re
.DOTALL
)
1006 with telnetlib
.Telnet(server
) as t
:
1009 # t.set_debuglevel(10)
1011 # Wait for console greeting
1012 greeting
= t
.read_until(b
"> ", timeout
=30)
1014 log
.error("Could not get a console prompt")
1017 # Disable pagination
1018 t
.write(b
"terminal length 0\n")
1020 # Wait for the prompt to return
1023 # Fetch the routing tables
1024 for protocol
in ("ipv6", "ipv4"):
1025 log
.info("Requesting %s routing table" % protocol
)
1027 # Request the full unicast routing table
1028 t
.write(b
"show bgp %s unicast\n" % protocol
.encode())
1030 # Read entire header which ends with "Path"
1031 t
.read_until(b
"Path\r\n")
1034 # Try reading a full entry
1035 # Those might be broken across multiple lines but ends with i
1036 line
= t
.read_until(b
"i\r\n", timeout
=5)
1040 # Show line for debugging
1041 #log.debug(repr(line))
1043 # Try finding a route in here
1044 m
= route
.match(line
)
1046 network
, autnum
= m
.groups()
1048 # Convert network to string
1049 network
= network
.decode()
1051 # Append /24 for IPv4 addresses
1052 if not "/" in network
and not ":" in network
:
1053 network
= "%s/24" % network
1055 # Convert AS number to integer
1056 autnum
= int(autnum
)
1058 log
.info("Found announcement for %s by %s" % (network
, autnum
))
1060 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1061 VALUES(%s, %s) ON CONFLICT (network) DO \
1062 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1066 log
.info("Finished reading the %s routing table" % protocol
)
1068 def _bird_cmd(self
, socket_path
, command
):
1069 # Connect to the socket
1070 s
= socket
.socket(socket
.AF_UNIX
, socket
.SOCK_STREAM
)
1071 s
.connect(socket_path
)
1073 # Allocate some buffer
1077 s
.send(b
"%s\n" % command
.encode())
1080 # Fill up the buffer
1081 buffer += s
.recv(4096)
1084 # Search for the next newline
1085 pos
= buffer.find(b
"\n")
1087 # If we cannot find one, we go back and read more data
1091 # Cut after the newline character
1094 # Split the line we want and keep the rest in buffer
1095 line
, buffer = buffer[:pos
], buffer[pos
:]
1097 # Look for the end-of-output indicator
1098 if line
== b
"0000 \n":
1101 # Otherwise return the line
1104 def handle_update_overrides(self
, ns
):
1105 with self
.db
.transaction():
1106 # Drop all data that we have
1108 TRUNCATE TABLE autnum_overrides;
1109 TRUNCATE TABLE network_overrides;
1112 # Update overrides for various cloud providers big enough to publish their own IP
1113 # network allocation lists in a machine-readable format...
1114 self
._update
_overrides
_for
_aws
()
1116 # Update overrides for Spamhaus DROP feeds...
1117 self
._update
_overrides
_for
_spamhaus
_drop
()
1119 for file in ns
.files
:
1120 log
.info("Reading %s..." % file)
1122 with
open(file, "rb") as f
:
1123 for type, block
in location
.importer
.read_blocks(f
):
1125 network
= block
.get("net")
1126 # Try to parse and normalise the network
1128 network
= ipaddress
.ip_network(network
, strict
=False)
1129 except ValueError as e
:
1130 log
.warning("Invalid IP network: %s: %s" % (network
, e
))
1133 # Prevent that we overwrite all networks
1134 if network
.prefixlen
== 0:
1135 log
.warning("Skipping %s: You cannot overwrite default" % network
)
1139 INSERT INTO network_overrides(
1144 is_satellite_provider,
1147 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
1148 ON CONFLICT (network) DO NOTHING""",
1150 block
.get("country"),
1152 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1153 self
._parse
_bool
(block
, "is-satellite-provider"),
1154 self
._parse
_bool
(block
, "is-anycast"),
1155 self
._parse
_bool
(block
, "drop"),
1158 elif type == "aut-num":
1159 autnum
= block
.get("aut-num")
1161 # Check if AS number begins with "AS"
1162 if not autnum
.startswith("AS"):
1163 log
.warning("Invalid AS number: %s" % autnum
)
1170 INSERT INTO autnum_overrides(
1176 is_satellite_provider,
1179 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
1180 ON CONFLICT DO NOTHING""",
1183 block
.get("country"),
1185 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1186 self
._parse
_bool
(block
, "is-satellite-provider"),
1187 self
._parse
_bool
(block
, "is-anycast"),
1188 self
._parse
_bool
(block
, "drop"),
1192 log
.warning("Unsupported type: %s" % type)
1194 def _update_overrides_for_aws(self
):
1195 # Download Amazon AWS IP allocation file to create overrides...
1196 downloader
= location
.importer
.Downloader()
1199 with downloader
.request("https://ip-ranges.amazonaws.com/ip-ranges.json", return_blocks
=False) as f
:
1200 aws_ip_dump
= json
.load(f
.body
)
1201 except Exception as e
:
1202 log
.error("unable to preprocess Amazon AWS IP ranges: %s" % e
)
1205 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1206 # there seems to be no machine-readable version available of this other than
1207 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1208 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1209 # was helpful here as well.
1210 aws_region_country_map
= {
1215 "ap-northeast-3": "JP",
1216 "ap-northeast-2": "KR",
1217 "ap-southeast-1": "SG",
1218 "ap-southeast-2": "AU",
1219 "ap-southeast-3": "MY",
1220 "ap-southeast-4": "AU",
1221 "ap-northeast-1": "JP",
1222 "ca-central-1": "CA",
1223 "eu-central-1": "DE",
1224 "eu-central-2": "CH",
1231 "me-central-1": "AE",
1236 # Fetch all valid country codes to check parsed networks aganist...
1237 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
1241 validcountries
.append(row
.country_code
)
1243 with self
.db
.transaction():
1244 for snetwork
in aws_ip_dump
["prefixes"] + aws_ip_dump
["ipv6_prefixes"]:
1246 network
= ipaddress
.ip_network(snetwork
.get("ip_prefix") or snetwork
.get("ipv6_prefix"), strict
=False)
1248 log
.warning("Unable to parse line: %s" % snetwork
)
1251 # Sanitize parsed networks...
1252 if not self
._check
_parsed
_network
(network
):
1255 # Determine region of this network...
1256 region
= snetwork
["region"]
1260 # Any region name starting with "us-" will get "US" country code assigned straight away...
1261 if region
.startswith("us-"):
1263 elif region
.startswith("cn-"):
1264 # ... same goes for China ...
1266 elif region
== "GLOBAL":
1267 # ... funny region name for anycast-like networks ...
1269 elif region
in aws_region_country_map
:
1270 # ... assign looked up country code otherwise ...
1271 cc
= aws_region_country_map
[region
]
1273 # ... and bail out if we are missing something here
1274 log
.warning("Unable to determine country code for line: %s" % snetwork
)
1277 # Skip networks with unknown country codes
1278 if not is_anycast
and validcountries
and cc
not in validcountries
:
1279 log
.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1283 # Conduct SQL statement...
1285 INSERT INTO network_overrides(
1290 is_satellite_provider,
1292 ) VALUES (%s, %s, %s, %s, %s, %s)
1293 ON CONFLICT (network) DO NOTHING""",
1296 "Amazon AWS IP feed",
1303 def _update_overrides_for_spamhaus_drop(self
):
1304 downloader
= location
.importer
.Downloader()
1307 "https://www.spamhaus.org/drop/drop.txt",
1308 "https://www.spamhaus.org/drop/edrop.txt",
1309 "https://www.spamhaus.org/drop/dropv6.txt"
1313 "https://www.spamhaus.org/drop/asndrop.txt"
1318 with downloader
.request(url
, return_blocks
=False) as f
:
1319 fcontent
= f
.body
.readlines()
1320 except Exception as e
:
1321 log
.error("Unable to download Spamhaus DROP URL %s: %s" % (url
, e
))
1324 # Iterate through every line, filter comments and add remaining networks to
1325 # the override table in case they are valid...
1326 with self
.db
.transaction():
1327 for sline
in fcontent
:
1329 # The response is assumed to be encoded in UTF-8...
1330 sline
= sline
.decode("utf-8")
1332 # Comments start with a semicolon...
1333 if sline
.startswith(";"):
1336 # Extract network and ignore anything afterwards...
1338 network
= ipaddress
.ip_network(sline
.split()[0], strict
=False)
1340 log
.error("Unable to parse line: %s" % sline
)
1343 # Sanitize parsed networks...
1344 if not self
._check
_parsed
_network
(network
):
1345 log
.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
1349 # Conduct SQL statement...
1351 INSERT INTO network_overrides(
1355 ) VALUES (%s, %s, %s)
1356 ON CONFLICT (network) DO UPDATE SET is_drop = True""",
1358 "Spamhaus DROP lists",
1362 for url
in asn_urls
:
1364 with downloader
.request(url
, return_blocks
=False) as f
:
1365 fcontent
= f
.body
.readlines()
1366 except Exception as e
:
1367 log
.error("Unable to download Spamhaus DROP URL %s: %s" % (url
, e
))
1370 # Iterate through every line, filter comments and add remaining ASNs to
1371 # the override table in case they are valid...
1372 with self
.db
.transaction():
1373 for sline
in fcontent
:
1375 # The response is assumed to be encoded in UTF-8...
1376 sline
= sline
.decode("utf-8")
1378 # Comments start with a semicolon...
1379 if sline
.startswith(";"):
1382 # Throw away anything after the first space...
1383 sline
= sline
.split()[0]
1385 # ... strip the "AS" prefix from it ...
1386 sline
= sline
.strip("AS")
1388 # ... and convert it into an integer. Voila.
1391 # Filter invalid ASNs...
1392 if not self
._check
_parsed
_asn
(asn
):
1393 log
.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
1397 # Conduct SQL statement...
1399 INSERT INTO autnum_overrides(
1403 ) VALUES (%s, %s, %s)
1404 ON CONFLICT (number) DO UPDATE SET is_drop = True""",
1406 "Spamhaus ASN-DROP list",
1411 def _parse_bool(block
, key
):
1412 val
= block
.get(key
)
1414 # There is no point to proceed when we got None
1418 # Convert to lowercase
1422 if val
in ("yes", "1"):
1426 if val
in ("no", "0"):
1432 def handle_import_countries(self
, ns
):
1433 with self
.db
.transaction():
1434 # Drop all data that we have
1435 self
.db
.execute("TRUNCATE TABLE countries")
1437 for file in ns
.file:
1439 line
= line
.rstrip()
1441 # Ignore any comments
1442 if line
.startswith("#"):
1446 country_code
, continent_code
, name
= line
.split(maxsplit
=2)
1448 log
.warning("Could not parse line: %s" % line
)
1451 self
.db
.execute("INSERT INTO countries(country_code, name, continent_code) \
1452 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code
, name
, continent_code
)
1455 def split_line(line
):
1456 key
, colon
, val
= line
.partition(":")
1458 # Strip any excess space
1465 # Run the command line interface