2 ###############################################################################
4 # libloc - A library to determine the location of someone on the Internet #
6 # Copyright (C) 2020-2022 IPFire Development Team <info@ipfire.org> #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
18 ###############################################################################
30 # Load our location module
32 import location
.database
33 import location
.importer
34 from location
.i18n
import _
37 log
= logging
.getLogger("location.importer")
50 parser
= argparse
.ArgumentParser(
51 description
=_("Location Importer Command Line Interface"),
53 subparsers
= parser
.add_subparsers()
55 # Global configuration flags
56 parser
.add_argument("--debug", action
="store_true",
57 help=_("Enable debug output"))
58 parser
.add_argument("--quiet", action
="store_true",
59 help=_("Enable quiet mode"))
62 parser
.add_argument("--version", action
="version",
63 version
="%(prog)s @VERSION@")
66 parser
.add_argument("--database-host", required
=True,
67 help=_("Database Hostname"), metavar
=_("HOST"))
68 parser
.add_argument("--database-name", required
=True,
69 help=_("Database Name"), metavar
=_("NAME"))
70 parser
.add_argument("--database-username", required
=True,
71 help=_("Database Username"), metavar
=_("USERNAME"))
72 parser
.add_argument("--database-password", required
=True,
73 help=_("Database Password"), metavar
=_("PASSWORD"))
76 write
= subparsers
.add_parser("write", help=_("Write database to file"))
77 write
.set_defaults(func
=self
.handle_write
)
78 write
.add_argument("file", nargs
=1, help=_("Database File"))
79 write
.add_argument("--signing-key", nargs
="?", type=open, help=_("Signing Key"))
80 write
.add_argument("--backup-signing-key", nargs
="?", type=open, help=_("Backup Signing Key"))
81 write
.add_argument("--vendor", nargs
="?", help=_("Sets the vendor"))
82 write
.add_argument("--description", nargs
="?", help=_("Sets a description"))
83 write
.add_argument("--license", nargs
="?", help=_("Sets the license"))
84 write
.add_argument("--version", type=int, help=_("Database Format Version"))
87 update_whois
= subparsers
.add_parser("update-whois", help=_("Update WHOIS Information"))
88 update_whois
.set_defaults(func
=self
.handle_update_whois
)
90 # Update announcements
91 update_announcements
= subparsers
.add_parser("update-announcements",
92 help=_("Update BGP Annoucements"))
93 update_announcements
.set_defaults(func
=self
.handle_update_announcements
)
94 update_announcements
.add_argument("server", nargs
=1,
95 help=_("Route Server to connect to"), metavar
=_("SERVER"))
98 update_overrides
= subparsers
.add_parser("update-overrides",
99 help=_("Update overrides"),
101 update_overrides
.add_argument(
102 "files", nargs
="+", help=_("Files to import"),
104 update_overrides
.set_defaults(func
=self
.handle_update_overrides
)
107 import_countries
= subparsers
.add_parser("import-countries",
108 help=_("Import countries"),
110 import_countries
.add_argument("file", nargs
=1, type=argparse
.FileType("r"),
111 help=_("File to import"))
112 import_countries
.set_defaults(func
=self
.handle_import_countries
)
114 args
= parser
.parse_args()
118 location
.logger
.set_level(logging
.DEBUG
)
120 location
.logger
.set_level(logging
.WARNING
)
122 # Print usage if no action was given
123 if not "func" in args
:
130 # Parse command line arguments
131 args
= self
.parse_cli()
133 # Initialise database
134 self
.db
= self
._setup
_database
(args
)
137 ret
= args
.func(args
)
139 # Return with exit code
143 # Otherwise just exit
146 def _setup_database(self
, ns
):
148 Initialise the database
150 # Connect to database
151 db
= location
.database
.Connection(
152 host
=ns
.database_host
, database
=ns
.database_name
,
153 user
=ns
.database_username
, password
=ns
.database_password
,
156 with db
.transaction():
159 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
160 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
161 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
162 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
163 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
164 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
167 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
168 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
169 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
172 CREATE TABLE IF NOT EXISTS countries(
173 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
174 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
177 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
178 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
179 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
180 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
181 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
182 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
185 CREATE TABLE IF NOT EXISTS autnum_overrides(
186 number bigint NOT NULL,
189 is_anonymous_proxy boolean,
190 is_satellite_provider boolean,
193 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
194 ON autnum_overrides(number);
195 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
196 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
198 CREATE TABLE IF NOT EXISTS network_overrides(
199 network inet NOT NULL,
201 is_anonymous_proxy boolean,
202 is_satellite_provider boolean,
205 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
206 ON network_overrides(network);
207 CREATE INDEX IF NOT EXISTS network_overrides_search
208 ON network_overrides USING GIST(network inet_ops);
209 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
210 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
215 def handle_write(self
, ns
):
217 Compiles a database in libloc format out of what is in the database
220 writer
= location
.Writer(ns
.signing_key
, ns
.backup_signing_key
)
224 writer
.vendor
= ns
.vendor
227 writer
.description
= ns
.description
230 writer
.license
= ns
.license
232 # Add all Autonomous Systems
233 log
.info("Writing Autonomous Systems...")
235 # Select all ASes with a name
236 rows
= self
.db
.query("""
238 autnums.number AS number,
240 (SELECT overrides.name FROM autnum_overrides overrides
241 WHERE overrides.number = autnums.number),
245 WHERE name <> %s ORDER BY number
249 a
= writer
.add_as(row
.number
)
253 log
.info("Writing networks...")
255 # Select all known networks
256 rows
= self
.db
.query("""
257 WITH known_networks AS (
258 SELECT network FROM announcements
260 SELECT network FROM networks
262 SELECT network FROM network_overrides
265 ordered_networks AS (
267 known_networks.network AS network,
268 announcements.autnum AS autnum,
269 networks.country AS country,
271 -- Must be part of returned values for ORDER BY clause
272 masklen(announcements.network) AS sort_a,
273 masklen(networks.network) AS sort_b
277 announcements ON known_networks.network <<= announcements.network
279 networks ON known_networks.network <<= networks.network
281 known_networks.network,
286 -- Return a list of those networks enriched with all
287 -- other information that we store in the database
289 DISTINCT ON (network)
296 SELECT country FROM network_overrides overrides
297 WHERE networks.network <<= overrides.network
298 ORDER BY masklen(overrides.network) DESC
302 SELECT country FROM autnum_overrides overrides
303 WHERE networks.autnum = overrides.number
311 SELECT is_anonymous_proxy FROM network_overrides overrides
312 WHERE networks.network <<= overrides.network
313 ORDER BY masklen(overrides.network) DESC
317 SELECT is_anonymous_proxy FROM autnum_overrides overrides
318 WHERE networks.autnum = overrides.number
321 ) AS is_anonymous_proxy,
324 SELECT is_satellite_provider FROM network_overrides overrides
325 WHERE networks.network <<= overrides.network
326 ORDER BY masklen(overrides.network) DESC
330 SELECT is_satellite_provider FROM autnum_overrides overrides
331 WHERE networks.autnum = overrides.number
334 ) AS is_satellite_provider,
337 SELECT is_anycast FROM network_overrides overrides
338 WHERE networks.network <<= overrides.network
339 ORDER BY masklen(overrides.network) DESC
343 SELECT is_anycast FROM autnum_overrides overrides
344 WHERE networks.autnum = overrides.number
350 SELECT is_drop FROM network_overrides overrides
351 WHERE networks.network <<= overrides.network
352 ORDER BY masklen(overrides.network) DESC
356 SELECT is_drop FROM autnum_overrides overrides
357 WHERE networks.autnum = overrides.number
362 ordered_networks networks
366 network
= writer
.add_network(row
.network
)
370 network
.country_code
= row
.country
374 network
.asn
= row
.autnum
377 if row
.is_anonymous_proxy
:
378 network
.set_flag(location
.NETWORK_FLAG_ANONYMOUS_PROXY
)
380 if row
.is_satellite_provider
:
381 network
.set_flag(location
.NETWORK_FLAG_SATELLITE_PROVIDER
)
384 network
.set_flag(location
.NETWORK_FLAG_ANYCAST
)
387 network
.set_flag(location
.NETWORK_FLAG_DROP
)
390 log
.info("Writing countries...")
391 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
394 c
= writer
.add_country(row
.country_code
)
395 c
.continent_code
= row
.continent_code
398 # Write everything to file
399 log
.info("Writing database to file...")
403 def handle_update_whois(self
, ns
):
404 downloader
= location
.importer
.Downloader()
406 # Download all sources
407 with self
.db
.transaction():
408 # Create some temporary tables to store parsed data
410 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL, organization text NOT NULL, source text NOT NULL)
412 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
414 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL, name text NOT NULL, source text NOT NULL)
416 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
418 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL)
420 CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
421 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
424 # Remove all previously imported content
426 TRUNCATE TABLE networks;
429 # Fetch all valid country codes to check parsed networks aganist...
430 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
434 validcountries
.append(row
.country_code
)
436 for source_key
in location
.importer
.WHOIS_SOURCES
:
437 for single_url
in location
.importer
.WHOIS_SOURCES
[source_key
]:
438 for block
in downloader
.request_blocks(single_url
):
439 self
._parse
_block
(block
, source_key
, validcountries
)
441 # Process all parsed networks from every RIR we happen to have access to,
442 # insert the largest network chunks into the networks table immediately...
443 families
= self
.db
.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
445 for family
in (row
.family
for row
in families
):
446 smallest
= self
.db
.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family
)
448 self
.db
.execute("INSERT INTO networks(network, country, original_countries, source) \
449 SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest
.prefix
, family
)
451 # ... determine any other prefixes for this network family, ...
452 prefixes
= self
.db
.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
453 WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family
)
455 # ... and insert networks with this prefix in case they provide additional
456 # information (i. e. subnet of a larger chunk with a different country)
457 for prefix
in (row
.prefix
for row
in prefixes
):
463 _rirdata.original_countries,
468 family(_rirdata.network) = %s
470 masklen(_rirdata.network) = %s
474 DISTINCT ON (c.network)
477 c.original_countries,
479 masklen(networks.network),
480 networks.country AS parent_country
486 c.network << networks.network
489 masklen(networks.network) DESC NULLS LAST
492 networks(network, country, original_countries, source)
501 parent_country IS NULL
503 country <> parent_country
504 ON CONFLICT DO NOTHING""",
509 INSERT INTO autnums(number, name, source)
510 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
511 JOIN _organizations ON _autnums.organization = _organizations.handle
512 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
515 # Download all extended sources
516 for source_key
in location
.importer
.EXTENDED_SOURCES
:
517 for single_url
in location
.importer
.EXTENDED_SOURCES
[source_key
]:
519 for line
in downloader
.request_lines(single_url
):
520 self
._parse
_line
(line
, source_key
, validcountries
)
522 # Download and import (technical) AS names from ARIN
523 self
._import
_as
_names
_from
_arin
()
525 def _check_parsed_network(self
, network
):
527 Assistive function to detect and subsequently sort out parsed
528 networks from RIR data (both Whois and so-called "extended sources"),
531 (a) not globally routable (RFC 1918 space, et al.)
532 (b) covering a too large chunk of the IP address space (prefix length
533 is < 7 for IPv4 networks, and < 10 for IPv6)
534 (c) "0.0.0.0" or "::" as a network address
535 (d) are too small for being publicly announced (we have decided not to
536 process them at the moment, as they significantly enlarge our
537 database without providing very helpful additional information)
539 This unfortunately is necessary due to brain-dead clutter across
540 various RIR databases, causing mismatches and eventually disruptions.
542 We will return False in case a network is not suitable for adding
543 it to our database, and True otherwise.
546 if not network
or not (isinstance(network
, ipaddress
.IPv4Network
) or isinstance(network
, ipaddress
.IPv6Network
)):
549 if not network
.is_global
:
550 log
.debug("Skipping non-globally routable network: %s" % network
)
553 if network
.version
== 4:
554 if network
.prefixlen
< 7:
555 log
.debug("Skipping too big IP chunk: %s" % network
)
558 if network
.prefixlen
> 24:
559 log
.debug("Skipping network too small to be publicly announced: %s" % network
)
562 if str(network
.network_address
) == "0.0.0.0":
563 log
.debug("Skipping network based on 0.0.0.0: %s" % network
)
566 elif network
.version
== 6:
567 if network
.prefixlen
< 10:
568 log
.debug("Skipping too big IP chunk: %s" % network
)
571 if network
.prefixlen
> 48:
572 log
.debug("Skipping network too small to be publicly announced: %s" % network
)
575 if str(network
.network_address
) == "::":
576 log
.debug("Skipping network based on '::': %s" % network
)
580 # This should not happen...
581 log
.warning("Skipping network of unknown family, this should not happen: %s" % network
)
584 # In case we have made it here, the network is considered to
585 # be suitable for libloc consumption...
588 def _check_parsed_asn(self
, asn
):
590 Assistive function to filter Autonomous System Numbers not being suitable
591 for adding to our database. Returns False in such cases, and True otherwise.
594 for start
, end
in VALID_ASN_RANGES
:
595 if start
<= asn
and end
>= asn
:
598 log
.info("Supplied ASN %s out of publicly routable ASN ranges" % asn
)
601 def _parse_block(self
, block
, source_key
, validcountries
= None):
602 # Get first line to find out what type of block this is
606 if line
.startswith("aut-num:"):
607 return self
._parse
_autnum
_block
(block
, source_key
)
610 if line
.startswith("inet6num:") or line
.startswith("inetnum:"):
611 return self
._parse
_inetnum
_block
(block
, source_key
, validcountries
)
614 elif line
.startswith("organisation:"):
615 return self
._parse
_org
_block
(block
, source_key
)
617 def _parse_autnum_block(self
, block
, source_key
):
621 key
, val
= split_line(line
)
624 m
= re
.match(r
"^(AS|as)(\d+)", val
)
626 autnum
["asn"] = m
.group(2)
629 autnum
[key
] = val
.upper()
632 # Save the first description line as well...
633 if not key
in autnum
:
637 if not autnum
or not "asn" in autnum
:
640 # Insert a dummy organisation handle into our temporary organisations
641 # table in case the AS does not have an organisation handle set, but
642 # has a description (a quirk often observed in APNIC area), so we can
643 # later display at least some string for this AS.
644 if not "org" in autnum
:
645 if "descr" in autnum
:
646 autnum
["org"] = "LIBLOC-%s-ORGHANDLE" % autnum
.get("asn")
648 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
649 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
650 autnum
.get("org"), autnum
.get("descr"), source_key
,
653 log
.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
657 # Insert into database
658 self
.db
.execute("INSERT INTO _autnums(number, organization, source) \
659 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
660 organization = excluded.organization",
661 autnum
.get("asn"), autnum
.get("org"), source_key
,
664 def _parse_inetnum_block(self
, block
, source_key
, validcountries
= None):
665 log
.debug("Parsing inetnum block:")
672 key
, val
= split_line(line
)
674 # Filter any inetnum records which are only referring to IP space
675 # not managed by that specific RIR...
677 if re
.match(r
"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val
.strip()):
678 log
.debug("Skipping record indicating historic/orphaned data: %s" % val
.strip())
682 start_address
, delim
, end_address
= val
.partition("-")
684 # Strip any excess space
685 start_address
, end_address
= start_address
.rstrip(), end_address
.strip()
687 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
688 if start_address
and not (delim
or end_address
):
690 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
692 start_address
= start_address
.split("/")
693 ldigits
= start_address
[0].count(".")
695 # How many octets do we need to add?
696 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
698 start_address
= start_address
[0] + ".0.0/" + start_address
[1]
700 start_address
= start_address
[0] + ".0/" + start_address
[1]
702 log
.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line
)
706 start_address
= ipaddress
.ip_network(start_address
, strict
=False)
708 log
.warning("Could not parse line in LACNIC DB format: %s" % line
)
711 # Enumerate first and last IP address of this network
712 end_address
= start_address
[-1]
713 start_address
= start_address
[0]
716 # Convert to IP address
718 start_address
= ipaddress
.ip_address(start_address
)
719 end_address
= ipaddress
.ip_address(end_address
)
721 log
.warning("Could not parse line: %s" % line
)
724 inetnum
["inetnum"] = list(ipaddress
.summarize_address_range(start_address
, end_address
))
726 elif key
== "inet6num":
727 inetnum
[key
] = [ipaddress
.ip_network(val
, strict
=False)]
729 elif key
== "country":
732 # Catch RIR data objects with more than one country code...
733 if not key
in inetnum
:
736 if val
in inetnum
.get("country"):
737 # ... but keep this list distinct...
740 # When people set country codes to "UK", they actually mean "GB"
744 inetnum
[key
].append(val
)
747 if not inetnum
or not "country" in inetnum
:
750 # Prepare skipping objects with unknown country codes...
751 invalidcountries
= [singlecountry
for singlecountry
in inetnum
.get("country") if singlecountry
not in validcountries
]
753 # Iterate through all networks enumerated from above, check them for plausibility and insert
754 # them into the database, if _check_parsed_network() succeeded
755 for single_network
in inetnum
.get("inet6num") or inetnum
.get("inetnum"):
756 if self
._check
_parsed
_network
(single_network
):
758 # Skip objects with unknown country codes if they are valid to avoid log spam...
759 if validcountries
and invalidcountries
:
760 log
.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
761 (invalidcountries
, inetnum
.get("country"), inetnum
.get("inet6num") or inetnum
.get("inetnum")))
764 # Everything is fine here, run INSERT statement...
765 self
.db
.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
766 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
767 "%s" % single_network
, inetnum
.get("country")[0], inetnum
.get("country"), source_key
,
770 def _parse_org_block(self
, block
, source_key
):
774 key
, val
= split_line(line
)
776 if key
== "organisation":
777 org
[key
] = val
.upper()
778 elif key
== "org-name":
785 self
.db
.execute("INSERT INTO _organizations(handle, name, source) \
786 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
787 UPDATE SET name = excluded.name",
788 org
.get("organisation"), org
.get("org-name"), source_key
,
791 def _parse_line(self
, line
, source_key
, validcountries
= None):
793 if line
.startswith("2"):
797 if line
.startswith("#"):
801 registry
, country_code
, type, line
= line
.split("|", 3)
803 log
.warning("Could not parse line: %s" % line
)
806 # Skip any lines that are for stats only or do not have a country
807 # code at all (avoids log spam below)
808 if not country_code
or country_code
== '*':
811 # Skip objects with unknown country codes
812 if validcountries
and country_code
not in validcountries
:
813 log
.warning("Skipping line with bogus country '%s': %s" % \
814 (country_code
, line
))
817 if type in ("ipv6", "ipv4"):
818 return self
._parse
_ip
_line
(country_code
, type, line
, source_key
)
820 def _parse_ip_line(self
, country
, type, line
, source_key
):
822 address
, prefix
, date
, status
, organization
= line
.split("|")
826 # Try parsing the line without organization
828 address
, prefix
, date
, status
= line
.split("|")
830 log
.warning("Unhandled line format: %s" % line
)
833 # Skip anything that isn't properly assigned
834 if not status
in ("assigned", "allocated"):
837 # Cast prefix into an integer
841 log
.warning("Invalid prefix: %s" % prefix
)
844 # Fix prefix length for IPv4
846 prefix
= 32 - int(math
.log(prefix
, 2))
848 # Try to parse the address
850 network
= ipaddress
.ip_network("%s/%s" % (address
, prefix
), strict
=False)
852 log
.warning("Invalid IP address: %s" % address
)
855 if not self
._check
_parsed
_network
(network
):
858 self
.db
.execute("INSERT INTO networks(network, country, original_countries, source) \
859 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
860 UPDATE SET country = excluded.country",
861 "%s" % network
, country
, [country
], source_key
,
864 def _import_as_names_from_arin(self
):
865 downloader
= location
.importer
.Downloader()
867 # XXX: Download AS names file from ARIN (note that these names appear to be quite
868 # technical, not intended for human consumption, as description fields in
869 # organisation handles for other RIRs are - however, this is what we have got,
870 # and in some cases, it might be still better than nothing)
871 for line
in downloader
.request_lines("https://ftp.arin.net/info/asn.txt"):
872 # Valid lines start with a space, followed by the number of the Autonomous System ...
873 if not line
.startswith(" "):
876 # Split line and check if there is a valid ASN in it...
877 asn
, name
= line
.split()[0:2]
882 log
.debug("Skipping ARIN AS names line not containing an integer for ASN")
885 # Filter invalid ASNs...
886 if not self
._check
_parsed
_asn
(asn
):
889 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
890 if re
.match(r
"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|LACNIC|RIPE|IANA)(?:\d?$|\-)", name
):
893 # Bail out in case the AS name contains anything we do not expect here...
894 if re
.search(r
"[^a-zA-Z0-9-_]", name
):
895 log
.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
898 # Things look good here, run INSERT statement and skip this one if we already have
899 # a (better?) name for this Autonomous System...
905 ) VALUES (%s, %s, %s)
906 ON CONFLICT (number) DO NOTHING""",
912 def handle_update_announcements(self
, ns
):
913 server
= ns
.server
[0]
915 with self
.db
.transaction():
916 if server
.startswith("/"):
917 self
._handle
_update
_announcements
_from
_bird
(server
)
919 self
._handle
_update
_announcements
_from
_telnet
(server
)
921 # Purge anything we never want here
923 -- Delete default routes
924 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
926 -- Delete anything that is not global unicast address space
927 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
929 -- DELETE "current network" address space
930 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
932 -- DELETE local loopback address space
933 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
935 -- DELETE RFC 1918 address space
936 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
937 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
938 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
940 -- DELETE test, benchmark and documentation address space
941 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
942 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
943 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
944 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
945 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
947 -- DELETE CGNAT address space (RFC 6598)
948 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
950 -- DELETE link local address space
951 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
953 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
954 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
955 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
957 -- DELETE multicast and reserved address space
958 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
959 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
961 -- Delete networks that are too small to be in the global routing table
962 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
963 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
965 -- Delete any non-public or reserved ASNs
966 DELETE FROM announcements WHERE NOT (
967 (autnum >= 1 AND autnum <= 23455)
969 (autnum >= 23457 AND autnum <= 64495)
971 (autnum >= 131072 AND autnum <= 4199999999)
974 -- Delete everything that we have not seen for 14 days
975 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
978 def _handle_update_announcements_from_bird(self
, server
):
979 # Pre-compile the regular expression for faster searching
980 route
= re
.compile(b
"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
982 log
.info("Requesting routing table from Bird (%s)" % server
)
984 aggregated_networks
= []
986 # Send command to list all routes
987 for line
in self
._bird
_cmd
(server
, "show route"):
988 m
= route
.match(line
)
994 # Ignore any header lines with the name of the routing table
995 elif line
.startswith(b
"Table"):
1000 log
.debug("Could not parse line: %s" % line
.decode())
1004 # Fetch the extracted network and ASN
1005 network
, autnum
= m
.groups()
1007 # Decode into strings
1009 network
= network
.decode()
1011 autnum
= autnum
.decode()
1013 # Collect all aggregated networks
1015 log
.debug("%s is an aggregated network" % network
)
1016 aggregated_networks
.append(network
)
1019 # Insert it into the database
1020 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1021 VALUES(%s, %s) ON CONFLICT (network) DO \
1022 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1026 # Process any aggregated networks
1027 for network
in aggregated_networks
:
1028 log
.debug("Processing aggregated network %s" % network
)
1030 # Run "show route all" for each network
1031 for line
in self
._bird
_cmd
(server
, "show route %s all" % network
):
1032 # Try finding the path
1033 m
= re
.match(b
"\s+BGP\.as_path:.* (\d+) {\d+}$", line
)
1035 # Select the last AS number in the path
1036 autnum
= m
.group(1).decode()
1038 # Insert it into the database
1039 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1040 VALUES(%s, %s) ON CONFLICT (network) DO \
1041 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1045 # We don't need to process any more
1048 def _handle_update_announcements_from_telnet(self
, server
):
1049 # Pre-compile regular expression for routes
1050 route
= re
.compile(b
"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re
.MULTILINE|re
.DOTALL
)
1052 with telnetlib
.Telnet(server
) as t
:
1055 # t.set_debuglevel(10)
1057 # Wait for console greeting
1058 greeting
= t
.read_until(b
"> ", timeout
=30)
1060 log
.error("Could not get a console prompt")
1063 # Disable pagination
1064 t
.write(b
"terminal length 0\n")
1066 # Wait for the prompt to return
1069 # Fetch the routing tables
1070 for protocol
in ("ipv6", "ipv4"):
1071 log
.info("Requesting %s routing table" % protocol
)
1073 # Request the full unicast routing table
1074 t
.write(b
"show bgp %s unicast\n" % protocol
.encode())
1076 # Read entire header which ends with "Path"
1077 t
.read_until(b
"Path\r\n")
1080 # Try reading a full entry
1081 # Those might be broken across multiple lines but ends with i
1082 line
= t
.read_until(b
"i\r\n", timeout
=5)
1086 # Show line for debugging
1087 #log.debug(repr(line))
1089 # Try finding a route in here
1090 m
= route
.match(line
)
1092 network
, autnum
= m
.groups()
1094 # Convert network to string
1095 network
= network
.decode()
1097 # Append /24 for IPv4 addresses
1098 if not "/" in network
and not ":" in network
:
1099 network
= "%s/24" % network
1101 # Convert AS number to integer
1102 autnum
= int(autnum
)
1104 log
.info("Found announcement for %s by %s" % (network
, autnum
))
1106 self
.db
.execute("INSERT INTO announcements(network, autnum) \
1107 VALUES(%s, %s) ON CONFLICT (network) DO \
1108 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1112 log
.info("Finished reading the %s routing table" % protocol
)
1114 def _bird_cmd(self
, socket_path
, command
):
1115 # Connect to the socket
1116 s
= socket
.socket(socket
.AF_UNIX
, socket
.SOCK_STREAM
)
1117 s
.connect(socket_path
)
1119 # Allocate some buffer
1122 log
.debug("Sending Bird command: %s" % command
)
1125 s
.send(b
"%s\n" % command
.encode())
1128 # Fill up the buffer
1129 buffer += s
.recv(4096)
1132 # Search for the next newline
1133 pos
= buffer.find(b
"\n")
1135 # If we cannot find one, we go back and read more data
1139 # Cut after the newline character
1142 # Split the line we want and keep the rest in buffer
1143 line
, buffer = buffer[:pos
], buffer[pos
:]
1145 # Try parsing any status lines
1146 if len(line
) > 4 and line
[:4].isdigit() and line
[4] in (32, 45):
1147 code
, delim
, line
= int(line
[:4]), line
[4], line
[5:]
1149 log
.debug("Received response code %s from bird" % code
)
1159 # Otherwise return the line
1162 def handle_update_overrides(self
, ns
):
1163 with self
.db
.transaction():
1164 # Only drop manually created overrides, as we can be reasonably sure to have them,
1165 # and preserve the rest. If appropriate, it is deleted by correspondent functions.
1167 DELETE FROM autnum_overrides WHERE source = 'manual';
1168 DELETE FROM network_overrides WHERE source = 'manual';
1171 # Update overrides for various cloud providers big enough to publish their own IP
1172 # network allocation lists in a machine-readable format...
1173 self
._update
_overrides
_for
_aws
()
1175 # Update overrides for Spamhaus DROP feeds...
1176 self
._update
_overrides
_for
_spamhaus
_drop
()
1178 for file in ns
.files
:
1179 log
.info("Reading %s..." % file)
1181 with
open(file, "rb") as f
:
1182 for type, block
in location
.importer
.read_blocks(f
):
1184 network
= block
.get("net")
1185 # Try to parse and normalise the network
1187 network
= ipaddress
.ip_network(network
, strict
=False)
1188 except ValueError as e
:
1189 log
.warning("Invalid IP network: %s: %s" % (network
, e
))
1192 # Prevent that we overwrite all networks
1193 if network
.prefixlen
== 0:
1194 log
.warning("Skipping %s: You cannot overwrite default" % network
)
1198 INSERT INTO network_overrides(
1203 is_satellite_provider,
1206 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
1207 ON CONFLICT (network) DO NOTHING""",
1209 block
.get("country"),
1211 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1212 self
._parse
_bool
(block
, "is-satellite-provider"),
1213 self
._parse
_bool
(block
, "is-anycast"),
1214 self
._parse
_bool
(block
, "drop"),
1217 elif type == "aut-num":
1218 autnum
= block
.get("aut-num")
1220 # Check if AS number begins with "AS"
1221 if not autnum
.startswith("AS"):
1222 log
.warning("Invalid AS number: %s" % autnum
)
1229 INSERT INTO autnum_overrides(
1235 is_satellite_provider,
1238 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
1239 ON CONFLICT DO NOTHING""",
1242 block
.get("country"),
1244 self
._parse
_bool
(block
, "is-anonymous-proxy"),
1245 self
._parse
_bool
(block
, "is-satellite-provider"),
1246 self
._parse
_bool
(block
, "is-anycast"),
1247 self
._parse
_bool
(block
, "drop"),
1251 log
.warning("Unsupported type: %s" % type)
1253 def _update_overrides_for_aws(self
):
1254 # Download Amazon AWS IP allocation file to create overrides...
1255 downloader
= location
.importer
.Downloader()
1259 f
= downloader
.retrieve("https://ip-ranges.amazonaws.com/ip-ranges.json")
1261 # Parse downloaded file
1262 aws_ip_dump
= json
.load(f
)
1263 except Exception as e
:
1264 log
.error("unable to preprocess Amazon AWS IP ranges: %s" % e
)
1267 # At this point, we can assume the downloaded file to be valid
1269 DELETE FROM network_overrides WHERE source = 'Amazon AWS IP feed';
1272 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1273 # there seems to be no machine-readable version available of this other than
1274 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1275 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1276 # was helpful here as well.
1277 aws_region_country_map
= {
1282 "ap-northeast-3": "JP",
1283 "ap-northeast-2": "KR",
1284 "ap-southeast-1": "SG",
1285 "ap-southeast-2": "AU",
1286 "ap-southeast-3": "MY",
1287 "ap-southeast-4": "AU",
1288 "ap-northeast-1": "JP",
1289 "ca-central-1": "CA",
1290 "eu-central-1": "DE",
1291 "eu-central-2": "CH",
1298 "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
1299 "me-central-1": "AE",
1304 # Fetch all valid country codes to check parsed networks aganist...
1305 rows
= self
.db
.query("SELECT * FROM countries ORDER BY country_code")
1309 validcountries
.append(row
.country_code
)
1311 with self
.db
.transaction():
1312 for snetwork
in aws_ip_dump
["prefixes"] + aws_ip_dump
["ipv6_prefixes"]:
1314 network
= ipaddress
.ip_network(snetwork
.get("ip_prefix") or snetwork
.get("ipv6_prefix"), strict
=False)
1316 log
.warning("Unable to parse line: %s" % snetwork
)
1319 # Sanitize parsed networks...
1320 if not self
._check
_parsed
_network
(network
):
1323 # Determine region of this network...
1324 region
= snetwork
["region"]
1328 # Any region name starting with "us-" will get "US" country code assigned straight away...
1329 if region
.startswith("us-"):
1331 elif region
.startswith("cn-"):
1332 # ... same goes for China ...
1334 elif region
== "GLOBAL":
1335 # ... funny region name for anycast-like networks ...
1337 elif region
in aws_region_country_map
:
1338 # ... assign looked up country code otherwise ...
1339 cc
= aws_region_country_map
[region
]
1341 # ... and bail out if we are missing something here
1342 log
.warning("Unable to determine country code for line: %s" % snetwork
)
1345 # Skip networks with unknown country codes
1346 if not is_anycast
and validcountries
and cc
not in validcountries
:
1347 log
.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1351 # Conduct SQL statement...
1353 INSERT INTO network_overrides(
1358 is_satellite_provider,
1360 ) VALUES (%s, %s, %s, %s, %s, %s)
1361 ON CONFLICT (network) DO NOTHING""",
1364 "Amazon AWS IP feed",
1371 def _update_overrides_for_spamhaus_drop(self
):
1372 downloader
= location
.importer
.Downloader()
1375 "https://www.spamhaus.org/drop/drop.txt",
1376 "https://www.spamhaus.org/drop/edrop.txt",
1377 "https://www.spamhaus.org/drop/dropv6.txt"
1381 "https://www.spamhaus.org/drop/asndrop.txt"
1386 f
= downloader
.retrieve(url
)
1389 fcontent
= f
.readlines()
1391 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
1393 if len(fcontent
) > 10:
1395 DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list';
1396 DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists';
1399 log
.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url
)
1402 # Iterate through every line, filter comments and add remaining networks to
1403 # the override table in case they are valid...
1404 with self
.db
.transaction():
1405 for sline
in fcontent
:
1406 # The response is assumed to be encoded in UTF-8...
1407 sline
= sline
.decode("utf-8")
1409 # Comments start with a semicolon...
1410 if sline
.startswith(";"):
1413 # Extract network and ignore anything afterwards...
1415 network
= ipaddress
.ip_network(sline
.split()[0], strict
=False)
1417 log
.error("Unable to parse line: %s" % sline
)
1420 # Sanitize parsed networks...
1421 if not self
._check
_parsed
_network
(network
):
1422 log
.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
1426 # Conduct SQL statement...
1428 INSERT INTO network_overrides(
1432 ) VALUES (%s, %s, %s)
1433 ON CONFLICT (network) DO UPDATE SET is_drop = True""",
1435 "Spamhaus DROP lists",
1439 for url
in asn_urls
:
1441 f
= downloader
.retrieve(url
)
1443 # Iterate through every line, filter comments and add remaining ASNs to
1444 # the override table in case they are valid...
1445 with self
.db
.transaction():
1446 for sline
in t
.readlines():
1447 # The response is assumed to be encoded in UTF-8...
1448 sline
= sline
.decode("utf-8")
1450 # Comments start with a semicolon...
1451 if sline
.startswith(";"):
1454 # Throw away anything after the first space...
1455 sline
= sline
.split()[0]
1457 # ... strip the "AS" prefix from it ...
1458 sline
= sline
.strip("AS")
1460 # ... and convert it into an integer. Voila.
1463 # Filter invalid ASNs...
1464 if not self
._check
_parsed
_asn
(asn
):
1465 log
.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
1469 # Conduct SQL statement...
1471 INSERT INTO autnum_overrides(
1475 ) VALUES (%s, %s, %s)
1476 ON CONFLICT (number) DO UPDATE SET is_drop = True""",
1478 "Spamhaus ASN-DROP list",
1483 def _parse_bool(block
, key
):
1484 val
= block
.get(key
)
1486 # There is no point to proceed when we got None
1490 # Convert to lowercase
1494 if val
in ("yes", "1"):
1498 if val
in ("no", "0"):
1504 def handle_import_countries(self
, ns
):
1505 with self
.db
.transaction():
1506 # Drop all data that we have
1507 self
.db
.execute("TRUNCATE TABLE countries")
1509 for file in ns
.file:
1511 line
= line
.rstrip()
1513 # Ignore any comments
1514 if line
.startswith("#"):
1518 country_code
, continent_code
, name
= line
.split(maxsplit
=2)
1520 log
.warning("Could not parse line: %s" % line
)
1523 self
.db
.execute("INSERT INTO countries(country_code, name, continent_code) \
1524 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code
, name
, continent_code
)
1527 def split_line(line
):
1528 key
, colon
, val
= line
.partition(":")
1530 # Strip any excess space
1537 # Run the command line interface