]> git.ipfire.org Git - location/libloc.git/blob - src/python/location-importer.in
location-import.in: optimise regular expression for filtering ASN allocations to...
[location/libloc.git] / src / python / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020-2021 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import json
23 import logging
24 import math
25 import re
26 import socket
27 import sys
28 import telnetlib
29
30 # Load our location module
31 import location
32 import location.database
33 import location.importer
34 from location.i18n import _
35
36 # Initialise logging
37 log = logging.getLogger("location.importer")
38 log.propagate = 1
39
40 class CLI(object):
41 def parse_cli(self):
42 parser = argparse.ArgumentParser(
43 description=_("Location Importer Command Line Interface"),
44 )
45 subparsers = parser.add_subparsers()
46
47 # Global configuration flags
48 parser.add_argument("--debug", action="store_true",
49 help=_("Enable debug output"))
50 parser.add_argument("--quiet", action="store_true",
51 help=_("Enable quiet mode"))
52
53 # version
54 parser.add_argument("--version", action="version",
55 version="%(prog)s @VERSION@")
56
57 # Database
58 parser.add_argument("--database-host", required=True,
59 help=_("Database Hostname"), metavar=_("HOST"))
60 parser.add_argument("--database-name", required=True,
61 help=_("Database Name"), metavar=_("NAME"))
62 parser.add_argument("--database-username", required=True,
63 help=_("Database Username"), metavar=_("USERNAME"))
64 parser.add_argument("--database-password", required=True,
65 help=_("Database Password"), metavar=_("PASSWORD"))
66
67 # Write Database
68 write = subparsers.add_parser("write", help=_("Write database to file"))
69 write.set_defaults(func=self.handle_write)
70 write.add_argument("file", nargs=1, help=_("Database File"))
71 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
72 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
73 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
74 write.add_argument("--description", nargs="?", help=_("Sets a description"))
75 write.add_argument("--license", nargs="?", help=_("Sets the license"))
76 write.add_argument("--version", type=int, help=_("Database Format Version"))
77
78 # Update WHOIS
79 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
80 update_whois.set_defaults(func=self.handle_update_whois)
81
82 # Update announcements
83 update_announcements = subparsers.add_parser("update-announcements",
84 help=_("Update BGP Annoucements"))
85 update_announcements.set_defaults(func=self.handle_update_announcements)
86 update_announcements.add_argument("server", nargs=1,
87 help=_("Route Server to connect to"), metavar=_("SERVER"))
88
89 # Update overrides
90 update_overrides = subparsers.add_parser("update-overrides",
91 help=_("Update overrides"),
92 )
93 update_overrides.add_argument(
94 "files", nargs="+", help=_("Files to import"),
95 )
96 update_overrides.set_defaults(func=self.handle_update_overrides)
97
98 # Import countries
99 import_countries = subparsers.add_parser("import-countries",
100 help=_("Import countries"),
101 )
102 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
103 help=_("File to import"))
104 import_countries.set_defaults(func=self.handle_import_countries)
105
106 args = parser.parse_args()
107
108 # Configure logging
109 if args.debug:
110 location.logger.set_level(logging.DEBUG)
111 elif args.quiet:
112 location.logger.set_level(logging.WARNING)
113
114 # Print usage if no action was given
115 if not "func" in args:
116 parser.print_usage()
117 sys.exit(2)
118
119 return args
120
121 def run(self):
122 # Parse command line arguments
123 args = self.parse_cli()
124
125 # Initialise database
126 self.db = self._setup_database(args)
127
128 # Call function
129 ret = args.func(args)
130
131 # Return with exit code
132 if ret:
133 sys.exit(ret)
134
135 # Otherwise just exit
136 sys.exit(0)
137
138 def _setup_database(self, ns):
139 """
140 Initialise the database
141 """
142 # Connect to database
143 db = location.database.Connection(
144 host=ns.database_host, database=ns.database_name,
145 user=ns.database_username, password=ns.database_password,
146 )
147
148 with db.transaction():
149 db.execute("""
150 -- announcements
151 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
152 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
153 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
154 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
155 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
156 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
157
158 -- autnums
159 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
160 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text NOT NULL;
161 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
162
163 -- countries
164 CREATE TABLE IF NOT EXISTS countries(
165 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
166 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
167
168 -- networks
169 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
170 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
171 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text NOT NULL;
172 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
173 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
174 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
175
176 -- overrides
177 CREATE TABLE IF NOT EXISTS autnum_overrides(
178 number bigint NOT NULL,
179 name text,
180 country text,
181 is_anonymous_proxy boolean,
182 is_satellite_provider boolean,
183 is_anycast boolean
184 );
185 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
186 ON autnum_overrides(number);
187 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
188 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
189
190 CREATE TABLE IF NOT EXISTS network_overrides(
191 network inet NOT NULL,
192 country text,
193 is_anonymous_proxy boolean,
194 is_satellite_provider boolean,
195 is_anycast boolean
196 );
197 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
198 ON network_overrides(network);
199 CREATE INDEX IF NOT EXISTS network_overrides_search
200 ON network_overrides USING GIST(network inet_ops);
201 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
202 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
203 """)
204
205 return db
206
207 def handle_write(self, ns):
208 """
209 Compiles a database in libloc format out of what is in the database
210 """
211 # Allocate a writer
212 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
213
214 # Set all metadata
215 if ns.vendor:
216 writer.vendor = ns.vendor
217
218 if ns.description:
219 writer.description = ns.description
220
221 if ns.license:
222 writer.license = ns.license
223
224 # Add all Autonomous Systems
225 log.info("Writing Autonomous Systems...")
226
227 # Select all ASes with a name
228 rows = self.db.query("""
229 SELECT
230 autnums.number AS number,
231 COALESCE(
232 (SELECT overrides.name FROM autnum_overrides overrides
233 WHERE overrides.number = autnums.number),
234 autnums.name
235 ) AS name
236 FROM autnums
237 WHERE name <> %s ORDER BY number
238 """, "")
239
240 for row in rows:
241 a = writer.add_as(row.number)
242 a.name = row.name
243
244 # Add all networks
245 log.info("Writing networks...")
246
247 # Select all known networks
248 rows = self.db.query("""
249 -- Return a list of those networks enriched with all
250 -- other information that we store in the database
251 SELECT
252 DISTINCT ON (network)
253 network,
254 autnum,
255
256 -- Country
257 COALESCE(
258 (
259 SELECT country FROM network_overrides overrides
260 WHERE networks.network <<= overrides.network
261 ORDER BY masklen(overrides.network) DESC
262 LIMIT 1
263 ),
264 (
265 SELECT country FROM autnum_overrides overrides
266 WHERE networks.autnum = overrides.number
267 ),
268 networks.country
269 ) AS country,
270
271 -- Flags
272 COALESCE(
273 (
274 SELECT is_anonymous_proxy FROM network_overrides overrides
275 WHERE networks.network <<= overrides.network
276 ORDER BY masklen(overrides.network) DESC
277 LIMIT 1
278 ),
279 (
280 SELECT is_anonymous_proxy FROM autnum_overrides overrides
281 WHERE networks.autnum = overrides.number
282 ),
283 FALSE
284 ) AS is_anonymous_proxy,
285 COALESCE(
286 (
287 SELECT is_satellite_provider FROM network_overrides overrides
288 WHERE networks.network <<= overrides.network
289 ORDER BY masklen(overrides.network) DESC
290 LIMIT 1
291 ),
292 (
293 SELECT is_satellite_provider FROM autnum_overrides overrides
294 WHERE networks.autnum = overrides.number
295 ),
296 FALSE
297 ) AS is_satellite_provider,
298 COALESCE(
299 (
300 SELECT is_anycast FROM network_overrides overrides
301 WHERE networks.network <<= overrides.network
302 ORDER BY masklen(overrides.network) DESC
303 LIMIT 1
304 ),
305 (
306 SELECT is_anycast FROM autnum_overrides overrides
307 WHERE networks.autnum = overrides.number
308 ),
309 FALSE
310 ) AS is_anycast,
311 COALESCE(
312 (
313 SELECT is_drop FROM network_overrides overrides
314 WHERE networks.network <<= overrides.network
315 ORDER BY masklen(overrides.network) DESC
316 LIMIT 1
317 ),
318 (
319 SELECT is_drop FROM autnum_overrides overrides
320 WHERE networks.autnum = overrides.number
321 ),
322 FALSE
323 ) AS is_drop
324 FROM (
325 SELECT
326 known_networks.network AS network,
327 announcements.autnum AS autnum,
328 networks.country AS country,
329
330 -- Must be part of returned values for ORDER BY clause
331 masklen(announcements.network) AS sort_a,
332 masklen(networks.network) AS sort_b
333 FROM (
334 SELECT network FROM announcements
335 UNION ALL
336 SELECT network FROM networks
337 UNION ALL
338 SELECT network FROM network_overrides
339 ) known_networks
340 LEFT JOIN
341 announcements ON known_networks.network <<= announcements.network
342 LEFT JOIN
343 networks ON known_networks.network <<= networks.network
344 ORDER BY
345 known_networks.network,
346 sort_a DESC,
347 sort_b DESC
348 ) networks
349 """)
350
351 for row in rows:
352 network = writer.add_network(row.network)
353
354 # Save country
355 if row.country:
356 network.country_code = row.country
357
358 # Save ASN
359 if row.autnum:
360 network.asn = row.autnum
361
362 # Set flags
363 if row.is_anonymous_proxy:
364 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
365
366 if row.is_satellite_provider:
367 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
368
369 if row.is_anycast:
370 network.set_flag(location.NETWORK_FLAG_ANYCAST)
371
372 if row.is_drop:
373 network.set_flag(location.NETWORK_FLAG_DROP)
374
375 # Add all countries
376 log.info("Writing countries...")
377 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
378
379 for row in rows:
380 c = writer.add_country(row.country_code)
381 c.continent_code = row.continent_code
382 c.name = row.name
383
384 # Write everything to file
385 log.info("Writing database to file...")
386 for file in ns.file:
387 writer.write(file)
388
389 def handle_update_whois(self, ns):
390 downloader = location.importer.Downloader()
391
392 # Download all sources
393 with self.db.transaction():
394 # Create some temporary tables to store parsed data
395 self.db.execute("""
396 CREATE TEMPORARY TABLE _autnums(number integer, organization text, source text NOT NULL)
397 ON COMMIT DROP;
398 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
399
400 CREATE TEMPORARY TABLE _organizations(handle text, name text NOT NULL, source text NOT NULL)
401 ON COMMIT DROP;
402 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
403
404 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL)
405 ON COMMIT DROP;
406 CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
407 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
408 """)
409
410 # Remove all previously imported content
411 self.db.execute("""
412 TRUNCATE TABLE networks;
413 """)
414
415 # Fetch all valid country codes to check parsed networks aganist...
416 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
417 validcountries = []
418
419 for row in rows:
420 validcountries.append(row.country_code)
421
422 for source_key in location.importer.WHOIS_SOURCES:
423 for single_url in location.importer.WHOIS_SOURCES[source_key]:
424 with downloader.request(single_url, return_blocks=True) as f:
425 for block in f:
426 self._parse_block(block, source_key, validcountries)
427
428 # Process all parsed networks from every RIR we happen to have access to,
429 # insert the largest network chunks into the networks table immediately...
430 families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
431
432 for family in (row.family for row in families):
433 smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
434
435 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
436 SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
437
438 # ... determine any other prefixes for this network family, ...
439 prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
440 WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family)
441
442 # ... and insert networks with this prefix in case they provide additional
443 # information (i. e. subnet of a larger chunk with a different country)
444 for prefix in (row.prefix for row in prefixes):
445 self.db.execute("""
446 WITH candidates AS (
447 SELECT
448 _rirdata.network,
449 _rirdata.country,
450 _rirdata.original_countries,
451 _rirdata.source
452 FROM
453 _rirdata
454 WHERE
455 family(_rirdata.network) = %s
456 AND
457 masklen(_rirdata.network) = %s
458 ),
459 filtered AS (
460 SELECT
461 DISTINCT ON (c.network)
462 c.network,
463 c.country,
464 c.original_countries,
465 c.source,
466 masklen(networks.network),
467 networks.country AS parent_country
468 FROM
469 candidates c
470 LEFT JOIN
471 networks
472 ON
473 c.network << networks.network
474 ORDER BY
475 c.network,
476 masklen(networks.network) DESC NULLS LAST
477 )
478 INSERT INTO
479 networks(network, country, original_countries, source)
480 SELECT
481 network,
482 country,
483 original_countries,
484 source
485 FROM
486 filtered
487 WHERE
488 parent_country IS NULL
489 OR
490 country <> parent_country
491 ON CONFLICT DO NOTHING""",
492 family, prefix,
493 )
494
495 self.db.execute("""
496 INSERT INTO autnums(number, name, source)
497 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
498 JOIN _organizations ON _autnums.organization = _organizations.handle
499 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
500 """)
501
502 # Download all extended sources
503 for source_key in location.importer.EXTENDED_SOURCES:
504 for single_url in location.importer.EXTENDED_SOURCES[source_key]:
505 with self.db.transaction():
506 # Download data
507 with downloader.request(single_url) as f:
508 for line in f:
509 self._parse_line(line, source_key, validcountries)
510
511 # Download and import (technical) AS names from ARIN
512 self._import_as_names_from_arin()
513
514 def _check_parsed_network(self, network):
515 """
516 Assistive function to detect and subsequently sort out parsed
517 networks from RIR data (both Whois and so-called "extended sources"),
518 which are or have...
519
520 (a) not globally routable (RFC 1918 space, et al.)
521 (b) covering a too large chunk of the IP address space (prefix length
522 is < 7 for IPv4 networks, and < 10 for IPv6)
523 (c) "0.0.0.0" or "::" as a network address
524 (d) are too small for being publicly announced (we have decided not to
525 process them at the moment, as they significantly enlarge our
526 database without providing very helpful additional information)
527
528 This unfortunately is necessary due to brain-dead clutter across
529 various RIR databases, causing mismatches and eventually disruptions.
530
531 We will return False in case a network is not suitable for adding
532 it to our database, and True otherwise.
533 """
534
535 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
536 return False
537
538 if not network.is_global:
539 log.debug("Skipping non-globally routable network: %s" % network)
540 return False
541
542 if network.version == 4:
543 if network.prefixlen < 7:
544 log.debug("Skipping too big IP chunk: %s" % network)
545 return False
546
547 if network.prefixlen > 24:
548 log.debug("Skipping network too small to be publicly announced: %s" % network)
549 return False
550
551 if str(network.network_address) == "0.0.0.0":
552 log.debug("Skipping network based on 0.0.0.0: %s" % network)
553 return False
554
555 elif network.version == 6:
556 if network.prefixlen < 10:
557 log.debug("Skipping too big IP chunk: %s" % network)
558 return False
559
560 if network.prefixlen > 48:
561 log.debug("Skipping network too small to be publicly announced: %s" % network)
562 return False
563
564 if str(network.network_address) == "::":
565 log.debug("Skipping network based on '::': %s" % network)
566 return False
567
568 else:
569 # This should not happen...
570 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
571 return False
572
573 # In case we have made it here, the network is considered to
574 # be suitable for libloc consumption...
575 return True
576
577 def _parse_block(self, block, source_key, validcountries = None):
578 # Get first line to find out what type of block this is
579 line = block[0]
580
581 # aut-num
582 if line.startswith("aut-num:"):
583 return self._parse_autnum_block(block, source_key)
584
585 # inetnum
586 if line.startswith("inet6num:") or line.startswith("inetnum:"):
587 return self._parse_inetnum_block(block, source_key, validcountries)
588
589 # organisation
590 elif line.startswith("organisation:"):
591 return self._parse_org_block(block, source_key)
592
593 def _parse_autnum_block(self, block, source_key):
594 autnum = {}
595 for line in block:
596 # Split line
597 key, val = split_line(line)
598
599 if key == "aut-num":
600 m = re.match(r"^(AS|as)(\d+)", val)
601 if m:
602 autnum["asn"] = m.group(2)
603
604 elif key == "org":
605 autnum[key] = val.upper()
606
607 # Skip empty objects
608 if not autnum:
609 return
610
611 # Insert into database
612 self.db.execute("INSERT INTO _autnums(number, organization, source) \
613 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
614 organization = excluded.organization",
615 autnum.get("asn"), autnum.get("org"), source_key,
616 )
617
618 def _parse_inetnum_block(self, block, source_key, validcountries = None):
619 log.debug("Parsing inetnum block:")
620
621 inetnum = {}
622 for line in block:
623 log.debug(line)
624
625 # Split line
626 key, val = split_line(line)
627
628 # Filter any inetnum records which are only referring to IP space
629 # not managed by that specific RIR...
630 if key == "netname":
631 if re.match(r"(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK)", val.strip()):
632 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
633 return
634
635 if key == "inetnum":
636 start_address, delim, end_address = val.partition("-")
637
638 # Strip any excess space
639 start_address, end_address = start_address.rstrip(), end_address.strip()
640
641 # Convert to IP address
642 try:
643 start_address = ipaddress.ip_address(start_address)
644 end_address = ipaddress.ip_address(end_address)
645 except ValueError:
646 log.warning("Could not parse line: %s" % line)
647 return
648
649 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
650
651 elif key == "inet6num":
652 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
653
654 elif key == "country":
655 val = val.upper()
656
657 # Catch RIR data objects with more than one country code...
658 if not key in inetnum:
659 inetnum[key] = []
660 else:
661 if val in inetnum.get("country"):
662 # ... but keep this list distinct...
663 continue
664
665 inetnum[key].append(val)
666
667 # Skip empty objects
668 if not inetnum or not "country" in inetnum:
669 return
670
671 # Prepare skipping objects with unknown country codes...
672 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
673
674 # Iterate through all networks enumerated from above, check them for plausibility and insert
675 # them into the database, if _check_parsed_network() succeeded
676 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
677 if self._check_parsed_network(single_network):
678
679 # Skip objects with unknown country codes if they are valid to avoid log spam...
680 if validcountries and invalidcountries:
681 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
682 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
683
684 # Everything is fine here, run INSERT statement...
685 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
686 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
687 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
688 )
689
690 def _parse_org_block(self, block, source_key):
691 org = {}
692 for line in block:
693 # Split line
694 key, val = split_line(line)
695
696 if key == "organisation":
697 org[key] = val.upper()
698 elif key == "org-name":
699 org[key] = val
700
701 # Skip empty objects
702 if not org:
703 return
704
705 self.db.execute("INSERT INTO _organizations(handle, name, source) \
706 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
707 UPDATE SET name = excluded.name",
708 org.get("organisation"), org.get("org-name"), source_key,
709 )
710
711 def _parse_line(self, line, source_key, validcountries = None):
712 # Skip version line
713 if line.startswith("2"):
714 return
715
716 # Skip comments
717 if line.startswith("#"):
718 return
719
720 try:
721 registry, country_code, type, line = line.split("|", 3)
722 except:
723 log.warning("Could not parse line: %s" % line)
724 return
725
726 # Skip any lines that are for stats only or do not have a country
727 # code at all (avoids log spam below)
728 if not country_code or country_code == '*':
729 return
730
731 # Skip objects with unknown country codes
732 if validcountries and country_code not in validcountries:
733 log.warning("Skipping line with bogus country '%s': %s" % \
734 (country_code, line))
735 return
736
737 if type in ("ipv6", "ipv4"):
738 return self._parse_ip_line(country_code, type, line, source_key)
739
740 def _parse_ip_line(self, country, type, line, source_key):
741 try:
742 address, prefix, date, status, organization = line.split("|")
743 except ValueError:
744 organization = None
745
746 # Try parsing the line without organization
747 try:
748 address, prefix, date, status = line.split("|")
749 except ValueError:
750 log.warning("Unhandled line format: %s" % line)
751 return
752
753 # Skip anything that isn't properly assigned
754 if not status in ("assigned", "allocated"):
755 return
756
757 # Cast prefix into an integer
758 try:
759 prefix = int(prefix)
760 except:
761 log.warning("Invalid prefix: %s" % prefix)
762 return
763
764 # Fix prefix length for IPv4
765 if type == "ipv4":
766 prefix = 32 - int(math.log(prefix, 2))
767
768 # Try to parse the address
769 try:
770 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
771 except ValueError:
772 log.warning("Invalid IP address: %s" % address)
773 return
774
775 if not self._check_parsed_network(network):
776 return
777
778 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
779 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
780 UPDATE SET country = excluded.country",
781 "%s" % network, country, [country], source_key,
782 )
783
784 def _import_as_names_from_arin(self):
785 downloader = location.importer.Downloader()
786
787 # XXX: Download AS names file from ARIN (note that these names appear to be quite
788 # technical, not intended for human consumption, as description fields in
789 # organisation handles for other RIRs are - however, this is what we have got,
790 # and in some cases, it might be still better than nothing)
791 with downloader.request("https://ftp.arin.net/info/asn.txt", return_blocks=False) as f:
792 for line in f:
793 # Convert binary line to string...
794 line = str(line)
795
796 # ... valid lines start with a space, followed by the number of the Autonomous System ...
797 if not line.startswith(" "):
798 continue
799
800 # Split line and check if there is a valid ASN in it...
801 asn, name = line.split()[0:2]
802
803 try:
804 asn = int(asn)
805 except ValueError:
806 log.debug("Skipping ARIN AS names line not containing an integer for ASN")
807 continue
808
809 if not ((1 <= asn and asn <= 23455) or (23457 <= asn and asn <= 64495) or (131072 <= asn and asn <= 4199999999)):
810 log.debug("Skipping ARIN AS names line not containing a valid ASN: %s" % asn)
811 continue
812
813 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
814 if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|DNIC|LACNIC|RIPE|IANA)(?:\d?$|\-)", name):
815 continue
816
817 # Bail out in case the AS name contains anything we do not expect here...
818 if re.search(r"[^a-zA-Z0-9-_]", name):
819 log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
820 (asn, name))
821
822 # Things look good here, run INSERT statement and skip this one if we already have
823 # a (better?) name for this Autonomous System...
824 self.db.execute("""
825 INSERT INTO autnums(
826 number,
827 name,
828 source
829 ) VALUES (%s, %s, %s)
830 ON CONFLICT (number) DO NOTHING""",
831 asn,
832 name,
833 "ARIN",
834 )
835
836 def handle_update_announcements(self, ns):
837 server = ns.server[0]
838
839 with self.db.transaction():
840 if server.startswith("/"):
841 self._handle_update_announcements_from_bird(server)
842 else:
843 self._handle_update_announcements_from_telnet(server)
844
845 # Purge anything we never want here
846 self.db.execute("""
847 -- Delete default routes
848 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
849
850 -- Delete anything that is not global unicast address space
851 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
852
853 -- DELETE "current network" address space
854 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
855
856 -- DELETE local loopback address space
857 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
858
859 -- DELETE RFC 1918 address space
860 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
861 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
862 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
863
864 -- DELETE test, benchmark and documentation address space
865 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
866 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
867 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
868 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
869 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
870
871 -- DELETE CGNAT address space (RFC 6598)
872 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
873
874 -- DELETE link local address space
875 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
876
877 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
878 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
879 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
880
881 -- DELETE multicast and reserved address space
882 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
883 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
884
885 -- Delete networks that are too small to be in the global routing table
886 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
887 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
888
889 -- Delete any non-public or reserved ASNs
890 DELETE FROM announcements WHERE NOT (
891 (autnum >= 1 AND autnum <= 23455)
892 OR
893 (autnum >= 23457 AND autnum <= 64495)
894 OR
895 (autnum >= 131072 AND autnum <= 4199999999)
896 );
897
898 -- Delete everything that we have not seen for 14 days
899 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
900 """)
901
902 def _handle_update_announcements_from_bird(self, server):
903 # Pre-compile the regular expression for faster searching
904 route = re.compile(b"^\s(.+?)\s+.+?\[AS(.*?).\]$")
905
906 log.info("Requesting routing table from Bird (%s)" % server)
907
908 # Send command to list all routes
909 for line in self._bird_cmd(server, "show route"):
910 m = route.match(line)
911 if not m:
912 log.debug("Could not parse line: %s" % line.decode())
913 continue
914
915 # Fetch the extracted network and ASN
916 network, autnum = m.groups()
917
918 # Insert it into the database
919 self.db.execute("INSERT INTO announcements(network, autnum) \
920 VALUES(%s, %s) ON CONFLICT (network) DO \
921 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
922 network.decode(), autnum.decode(),
923 )
924
925 def _handle_update_announcements_from_telnet(self, server):
926 # Pre-compile regular expression for routes
927 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
928
929 with telnetlib.Telnet(server) as t:
930 # Enable debug mode
931 #if ns.debug:
932 # t.set_debuglevel(10)
933
934 # Wait for console greeting
935 greeting = t.read_until(b"> ", timeout=30)
936 if not greeting:
937 log.error("Could not get a console prompt")
938 return 1
939
940 # Disable pagination
941 t.write(b"terminal length 0\n")
942
943 # Wait for the prompt to return
944 t.read_until(b"> ")
945
946 # Fetch the routing tables
947 for protocol in ("ipv6", "ipv4"):
948 log.info("Requesting %s routing table" % protocol)
949
950 # Request the full unicast routing table
951 t.write(b"show bgp %s unicast\n" % protocol.encode())
952
953 # Read entire header which ends with "Path"
954 t.read_until(b"Path\r\n")
955
956 while True:
957 # Try reading a full entry
958 # Those might be broken across multiple lines but ends with i
959 line = t.read_until(b"i\r\n", timeout=5)
960 if not line:
961 break
962
963 # Show line for debugging
964 #log.debug(repr(line))
965
966 # Try finding a route in here
967 m = route.match(line)
968 if m:
969 network, autnum = m.groups()
970
971 # Convert network to string
972 network = network.decode()
973
974 # Append /24 for IPv4 addresses
975 if not "/" in network and not ":" in network:
976 network = "%s/24" % network
977
978 # Convert AS number to integer
979 autnum = int(autnum)
980
981 log.info("Found announcement for %s by %s" % (network, autnum))
982
983 self.db.execute("INSERT INTO announcements(network, autnum) \
984 VALUES(%s, %s) ON CONFLICT (network) DO \
985 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
986 network, autnum,
987 )
988
989 log.info("Finished reading the %s routing table" % protocol)
990
991 def _bird_cmd(self, socket_path, command):
992 # Connect to the socket
993 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
994 s.connect(socket_path)
995
996 # Allocate some buffer
997 buffer = b""
998
999 # Send the command
1000 s.send(b"%s\n" % command.encode())
1001
1002 while True:
1003 # Fill up the buffer
1004 buffer += s.recv(4096)
1005
1006 while True:
1007 # Search for the next newline
1008 pos = buffer.find(b"\n")
1009
1010 # If we cannot find one, we go back and read more data
1011 if pos <= 0:
1012 break
1013
1014 # Cut after the newline character
1015 pos += 1
1016
1017 # Split the line we want and keep the rest in buffer
1018 line, buffer = buffer[:pos], buffer[pos:]
1019
1020 # Look for the end-of-output indicator
1021 if line == b"0000 \n":
1022 return
1023
1024 # Otherwise return the line
1025 yield line
1026
1027 def handle_update_overrides(self, ns):
1028 with self.db.transaction():
1029 # Drop all data that we have
1030 self.db.execute("""
1031 TRUNCATE TABLE autnum_overrides;
1032 TRUNCATE TABLE network_overrides;
1033 """)
1034
1035 # Update overrides for various cloud providers big enough to publish their own IP
1036 # network allocation lists in a machine-readable format...
1037 self._update_overrides_for_aws()
1038
1039 for file in ns.files:
1040 log.info("Reading %s..." % file)
1041
1042 with open(file, "rb") as f:
1043 for type, block in location.importer.read_blocks(f):
1044 if type == "net":
1045 network = block.get("net")
1046 # Try to parse and normalise the network
1047 try:
1048 network = ipaddress.ip_network(network, strict=False)
1049 except ValueError as e:
1050 log.warning("Invalid IP network: %s: %s" % (network, e))
1051 continue
1052
1053 # Prevent that we overwrite all networks
1054 if network.prefixlen == 0:
1055 log.warning("Skipping %s: You cannot overwrite default" % network)
1056 continue
1057
1058 self.db.execute("""
1059 INSERT INTO network_overrides(
1060 network,
1061 country,
1062 source,
1063 is_anonymous_proxy,
1064 is_satellite_provider,
1065 is_anycast,
1066 is_drop
1067 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
1068 ON CONFLICT (network) DO NOTHING""",
1069 "%s" % network,
1070 block.get("country"),
1071 "manual",
1072 self._parse_bool(block, "is-anonymous-proxy"),
1073 self._parse_bool(block, "is-satellite-provider"),
1074 self._parse_bool(block, "is-anycast"),
1075 self._parse_bool(block, "drop"),
1076 )
1077
1078 elif type == "aut-num":
1079 autnum = block.get("aut-num")
1080
1081 # Check if AS number begins with "AS"
1082 if not autnum.startswith("AS"):
1083 log.warning("Invalid AS number: %s" % autnum)
1084 continue
1085
1086 # Strip "AS"
1087 autnum = autnum[2:]
1088
1089 self.db.execute("""
1090 INSERT INTO autnum_overrides(
1091 number,
1092 name,
1093 country,
1094 source,
1095 is_anonymous_proxy,
1096 is_satellite_provider,
1097 is_anycast,
1098 is_drop
1099 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
1100 ON CONFLICT DO NOTHING""",
1101 autnum,
1102 block.get("name"),
1103 block.get("country"),
1104 "manual",
1105 self._parse_bool(block, "is-anonymous-proxy"),
1106 self._parse_bool(block, "is-satellite-provider"),
1107 self._parse_bool(block, "is-anycast"),
1108 self._parse_bool(block, "drop"),
1109 )
1110
1111 else:
1112 log.warning("Unsupported type: %s" % type)
1113
1114 def _update_overrides_for_aws(self):
1115 # Download Amazon AWS IP allocation file to create overrides...
1116 downloader = location.importer.Downloader()
1117
1118 try:
1119 with downloader.request("https://ip-ranges.amazonaws.com/ip-ranges.json", return_blocks=False) as f:
1120 aws_ip_dump = json.load(f.body)
1121 except Exception as e:
1122 log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
1123 return
1124
1125 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1126 # there seems to be no machine-readable version available of this other than
1127 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1128 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1129 # was helpful here as well.
1130 aws_region_country_map = {
1131 "af-south-1": "ZA",
1132 "ap-east-1": "HK",
1133 "ap-south-1": "IN",
1134 "ap-south-2": "IN",
1135 "ap-northeast-3": "JP",
1136 "ap-northeast-2": "KR",
1137 "ap-southeast-1": "SG",
1138 "ap-southeast-2": "AU",
1139 "ap-southeast-3": "MY",
1140 "ap-southeast-4": "AU",
1141 "ap-northeast-1": "JP",
1142 "ca-central-1": "CA",
1143 "eu-central-1": "DE",
1144 "eu-central-2": "CH",
1145 "eu-west-1": "IE",
1146 "eu-west-2": "GB",
1147 "eu-south-1": "IT",
1148 "eu-south-2": "ES",
1149 "eu-west-3": "FR",
1150 "eu-north-1": "SE",
1151 "me-central-1": "AE",
1152 "me-south-1": "BH",
1153 "sa-east-1": "BR"
1154 }
1155
1156 # Fetch all valid country codes to check parsed networks aganist...
1157 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
1158 validcountries = []
1159
1160 for row in rows:
1161 validcountries.append(row.country_code)
1162
1163 with self.db.transaction():
1164 for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
1165 try:
1166 network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
1167 except ValueError:
1168 log.warning("Unable to parse line: %s" % snetwork)
1169 continue
1170
1171 # Sanitize parsed networks...
1172 if not self._check_parsed_network(network):
1173 continue
1174
1175 # Determine region of this network...
1176 region = snetwork["region"]
1177 cc = None
1178 is_anycast = False
1179
1180 # Any region name starting with "us-" will get "US" country code assigned straight away...
1181 if region.startswith("us-"):
1182 cc = "US"
1183 elif region.startswith("cn-"):
1184 # ... same goes for China ...
1185 cc = "CN"
1186 elif region == "GLOBAL":
1187 # ... funny region name for anycast-like networks ...
1188 is_anycast = True
1189 elif region in aws_region_country_map:
1190 # ... assign looked up country code otherwise ...
1191 cc = aws_region_country_map[region]
1192 else:
1193 # ... and bail out if we are missing something here
1194 log.warning("Unable to determine country code for line: %s" % snetwork)
1195 continue
1196
1197 # Skip networks with unknown country codes
1198 if not is_anycast and validcountries and cc not in validcountries:
1199 log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1200 (cc, network))
1201 return
1202
1203 # Conduct SQL statement...
1204 self.db.execute("""
1205 INSERT INTO network_overrides(
1206 network,
1207 country,
1208 source,
1209 is_anonymous_proxy,
1210 is_satellite_provider,
1211 is_anycast
1212 ) VALUES (%s, %s, %s, %s, %s, %s)
1213 ON CONFLICT (network) DO NOTHING""",
1214 "%s" % network,
1215 cc,
1216 "Amazon AWS IP feed",
1217 None,
1218 None,
1219 is_anycast,
1220 )
1221
1222
1223 @staticmethod
1224 def _parse_bool(block, key):
1225 val = block.get(key)
1226
1227 # There is no point to proceed when we got None
1228 if val is None:
1229 return
1230
1231 # Convert to lowercase
1232 val = val.lower()
1233
1234 # True
1235 if val in ("yes", "1"):
1236 return True
1237
1238 # False
1239 if val in ("no", "0"):
1240 return False
1241
1242 # Default to None
1243 return None
1244
1245 def handle_import_countries(self, ns):
1246 with self.db.transaction():
1247 # Drop all data that we have
1248 self.db.execute("TRUNCATE TABLE countries")
1249
1250 for file in ns.file:
1251 for line in file:
1252 line = line.rstrip()
1253
1254 # Ignore any comments
1255 if line.startswith("#"):
1256 continue
1257
1258 try:
1259 country_code, continent_code, name = line.split(maxsplit=2)
1260 except:
1261 log.warning("Could not parse line: %s" % line)
1262 continue
1263
1264 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
1265 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
1266
1267
1268 def split_line(line):
1269 key, colon, val = line.partition(":")
1270
1271 # Strip any excess space
1272 key = key.strip()
1273 val = val.strip()
1274
1275 return key, val
1276
1277 def main():
1278 # Run the command line interface
1279 c = CLI()
1280 c.run()
1281
1282 main()