]> git.ipfire.org Git - people/ms/libloc.git/blob - src/python/location-importer.in
location-importer.in: track original countries more pythonic
[people/ms/libloc.git] / src / python / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020-2021 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import logging
23 import math
24 import re
25 import socket
26 import sys
27 import telnetlib
28
29 # Load our location module
30 import location
31 import location.database
32 import location.importer
33 from location.i18n import _
34
35 # Initialise logging
36 log = logging.getLogger("location.importer")
37 log.propagate = 1
38
39 class CLI(object):
40 def parse_cli(self):
41 parser = argparse.ArgumentParser(
42 description=_("Location Importer Command Line Interface"),
43 )
44 subparsers = parser.add_subparsers()
45
46 # Global configuration flags
47 parser.add_argument("--debug", action="store_true",
48 help=_("Enable debug output"))
49 parser.add_argument("--quiet", action="store_true",
50 help=_("Enable quiet mode"))
51
52 # version
53 parser.add_argument("--version", action="version",
54 version="%(prog)s @VERSION@")
55
56 # Database
57 parser.add_argument("--database-host", required=True,
58 help=_("Database Hostname"), metavar=_("HOST"))
59 parser.add_argument("--database-name", required=True,
60 help=_("Database Name"), metavar=_("NAME"))
61 parser.add_argument("--database-username", required=True,
62 help=_("Database Username"), metavar=_("USERNAME"))
63 parser.add_argument("--database-password", required=True,
64 help=_("Database Password"), metavar=_("PASSWORD"))
65
66 # Write Database
67 write = subparsers.add_parser("write", help=_("Write database to file"))
68 write.set_defaults(func=self.handle_write)
69 write.add_argument("file", nargs=1, help=_("Database File"))
70 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
71 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
72 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
73 write.add_argument("--description", nargs="?", help=_("Sets a description"))
74 write.add_argument("--license", nargs="?", help=_("Sets the license"))
75 write.add_argument("--version", type=int, help=_("Database Format Version"))
76
77 # Update WHOIS
78 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
79 update_whois.set_defaults(func=self.handle_update_whois)
80
81 # Update announcements
82 update_announcements = subparsers.add_parser("update-announcements",
83 help=_("Update BGP Annoucements"))
84 update_announcements.set_defaults(func=self.handle_update_announcements)
85 update_announcements.add_argument("server", nargs=1,
86 help=_("Route Server to connect to"), metavar=_("SERVER"))
87
88 # Update overrides
89 update_overrides = subparsers.add_parser("update-overrides",
90 help=_("Update overrides"),
91 )
92 update_overrides.add_argument(
93 "files", nargs="+", help=_("Files to import"),
94 )
95 update_overrides.set_defaults(func=self.handle_update_overrides)
96
97 # Import countries
98 import_countries = subparsers.add_parser("import-countries",
99 help=_("Import countries"),
100 )
101 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
102 help=_("File to import"))
103 import_countries.set_defaults(func=self.handle_import_countries)
104
105 args = parser.parse_args()
106
107 # Configure logging
108 if args.debug:
109 location.logger.set_level(logging.DEBUG)
110 elif args.quiet:
111 location.logger.set_level(logging.WARNING)
112
113 # Print usage if no action was given
114 if not "func" in args:
115 parser.print_usage()
116 sys.exit(2)
117
118 return args
119
120 def run(self):
121 # Parse command line arguments
122 args = self.parse_cli()
123
124 # Initialise database
125 self.db = self._setup_database(args)
126
127 # Call function
128 ret = args.func(args)
129
130 # Return with exit code
131 if ret:
132 sys.exit(ret)
133
134 # Otherwise just exit
135 sys.exit(0)
136
137 def _setup_database(self, ns):
138 """
139 Initialise the database
140 """
141 # Connect to database
142 db = location.database.Connection(
143 host=ns.database_host, database=ns.database_name,
144 user=ns.database_username, password=ns.database_password,
145 )
146
147 with db.transaction():
148 db.execute("""
149 -- announcements
150 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
151 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
152 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
153 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
154 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
155 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
156
157 -- autnums
158 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
159 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text NOT NULL;
160 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
161
162 -- countries
163 CREATE TABLE IF NOT EXISTS countries(
164 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
165 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
166
167 -- networks
168 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
169 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
170 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text NOT NULL;
171 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
172 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
173 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
174
175 -- overrides
176 CREATE TABLE IF NOT EXISTS autnum_overrides(
177 number bigint NOT NULL,
178 name text,
179 country text,
180 is_anonymous_proxy boolean,
181 is_satellite_provider boolean,
182 is_anycast boolean
183 );
184 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
185 ON autnum_overrides(number);
186
187 CREATE TABLE IF NOT EXISTS network_overrides(
188 network inet NOT NULL,
189 country text,
190 is_anonymous_proxy boolean,
191 is_satellite_provider boolean,
192 is_anycast boolean
193 );
194 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
195 ON network_overrides(network);
196 CREATE INDEX IF NOT EXISTS network_overrides_search
197 ON network_overrides USING GIST(network inet_ops);
198 """)
199
200 return db
201
202 def handle_write(self, ns):
203 """
204 Compiles a database in libloc format out of what is in the database
205 """
206 # Allocate a writer
207 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
208
209 # Set all metadata
210 if ns.vendor:
211 writer.vendor = ns.vendor
212
213 if ns.description:
214 writer.description = ns.description
215
216 if ns.license:
217 writer.license = ns.license
218
219 # Add all Autonomous Systems
220 log.info("Writing Autonomous Systems...")
221
222 # Select all ASes with a name
223 rows = self.db.query("""
224 SELECT
225 autnums.number AS number,
226 COALESCE(
227 (SELECT overrides.name FROM autnum_overrides overrides
228 WHERE overrides.number = autnums.number),
229 autnums.name
230 ) AS name
231 FROM autnums
232 WHERE name <> %s ORDER BY number
233 """, "")
234
235 for row in rows:
236 a = writer.add_as(row.number)
237 a.name = row.name
238
239 # Add all networks
240 log.info("Writing networks...")
241
242 # Select all known networks
243 rows = self.db.query("""
244 -- Return a list of those networks enriched with all
245 -- other information that we store in the database
246 SELECT
247 DISTINCT ON (network)
248 network,
249 autnum,
250
251 -- Country
252 COALESCE(
253 (
254 SELECT country FROM network_overrides overrides
255 WHERE networks.network <<= overrides.network
256 ORDER BY masklen(overrides.network) DESC
257 LIMIT 1
258 ),
259 (
260 SELECT country FROM autnum_overrides overrides
261 WHERE networks.autnum = overrides.number
262 ),
263 networks.country
264 ) AS country,
265
266 -- Flags
267 COALESCE(
268 (
269 SELECT is_anonymous_proxy FROM network_overrides overrides
270 WHERE networks.network <<= overrides.network
271 ORDER BY masklen(overrides.network) DESC
272 LIMIT 1
273 ),
274 (
275 SELECT is_anonymous_proxy FROM autnum_overrides overrides
276 WHERE networks.autnum = overrides.number
277 ),
278 FALSE
279 ) AS is_anonymous_proxy,
280 COALESCE(
281 (
282 SELECT is_satellite_provider FROM network_overrides overrides
283 WHERE networks.network <<= overrides.network
284 ORDER BY masklen(overrides.network) DESC
285 LIMIT 1
286 ),
287 (
288 SELECT is_satellite_provider FROM autnum_overrides overrides
289 WHERE networks.autnum = overrides.number
290 ),
291 FALSE
292 ) AS is_satellite_provider,
293 COALESCE(
294 (
295 SELECT is_anycast FROM network_overrides overrides
296 WHERE networks.network <<= overrides.network
297 ORDER BY masklen(overrides.network) DESC
298 LIMIT 1
299 ),
300 (
301 SELECT is_anycast FROM autnum_overrides overrides
302 WHERE networks.autnum = overrides.number
303 ),
304 FALSE
305 ) AS is_anycast
306 FROM (
307 SELECT
308 known_networks.network AS network,
309 announcements.autnum AS autnum,
310 networks.country AS country,
311
312 -- Must be part of returned values for ORDER BY clause
313 masklen(announcements.network) AS sort_a,
314 masklen(networks.network) AS sort_b
315 FROM (
316 SELECT network FROM announcements
317 UNION ALL
318 SELECT network FROM networks
319 UNION ALL
320 SELECT network FROM network_overrides
321 ) known_networks
322 LEFT JOIN
323 announcements ON known_networks.network <<= announcements.network
324 LEFT JOIN
325 networks ON known_networks.network <<= networks.network
326 ORDER BY
327 known_networks.network,
328 sort_a DESC,
329 sort_b DESC
330 ) networks
331 """)
332
333 for row in rows:
334 network = writer.add_network(row.network)
335
336 # Save country
337 if row.country:
338 network.country_code = row.country
339
340 # Save ASN
341 if row.autnum:
342 network.asn = row.autnum
343
344 # Set flags
345 if row.is_anonymous_proxy:
346 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
347
348 if row.is_satellite_provider:
349 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
350
351 if row.is_anycast:
352 network.set_flag(location.NETWORK_FLAG_ANYCAST)
353
354 # Add all countries
355 log.info("Writing countries...")
356 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
357
358 for row in rows:
359 c = writer.add_country(row.country_code)
360 c.continent_code = row.continent_code
361 c.name = row.name
362
363 # Write everything to file
364 log.info("Writing database to file...")
365 for file in ns.file:
366 writer.write(file)
367
368 def handle_update_whois(self, ns):
369 downloader = location.importer.Downloader()
370
371 # Download all sources
372 with self.db.transaction():
373 # Create some temporary tables to store parsed data
374 self.db.execute("""
375 CREATE TEMPORARY TABLE _autnums(number integer, organization text, source text NOT NULL)
376 ON COMMIT DROP;
377 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
378
379 CREATE TEMPORARY TABLE _organizations(handle text, name text NOT NULL, source text NOT NULL)
380 ON COMMIT DROP;
381 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
382
383 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL)
384 ON COMMIT DROP;
385 CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
386 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
387 """)
388
389 # Remove all previously imported content
390 self.db.execute("""
391 TRUNCATE TABLE networks;
392 """)
393
394 # Fetch all valid country codes to check parsed networks aganist...
395 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
396 validcountries = []
397
398 for row in rows:
399 validcountries.append(row.country_code)
400
401 for source_key in location.importer.WHOIS_SOURCES:
402 for single_url in location.importer.WHOIS_SOURCES[source_key]:
403 with downloader.request(single_url, return_blocks=True) as f:
404 for block in f:
405 self._parse_block(block, source_key, validcountries)
406
407 # Process all parsed networks from every RIR we happen to have access to,
408 # insert the largest network chunks into the networks table immediately...
409 families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
410
411 for family in (row.family for row in families):
412 smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
413
414 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
415 SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
416
417 # ... determine any other prefixes for this network family, ...
418 prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
419 WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family)
420
421 # ... and insert networks with this prefix in case they provide additional
422 # information (i. e. subnet of a larger chunk with a different country)
423 for prefix in (row.prefix for row in prefixes):
424 self.db.execute("""
425 WITH candidates AS (
426 SELECT
427 _rirdata.network,
428 _rirdata.country,
429 _rirdata.original_countries,
430 _rirdata.source
431 FROM
432 _rirdata
433 WHERE
434 family(_rirdata.network) = %s
435 AND
436 masklen(_rirdata.network) = %s
437 ),
438 filtered AS (
439 SELECT
440 DISTINCT ON (c.network)
441 c.network,
442 c.country,
443 c.original_countries,
444 c.source,
445 masklen(networks.network),
446 networks.country AS parent_country
447 FROM
448 candidates c
449 LEFT JOIN
450 networks
451 ON
452 c.network << networks.network
453 ORDER BY
454 c.network,
455 masklen(networks.network) DESC NULLS LAST
456 )
457 INSERT INTO
458 networks(network, country, original_countries, source)
459 SELECT
460 network,
461 country,
462 original_countries,
463 source
464 FROM
465 filtered
466 WHERE
467 parent_country IS NULL
468 OR
469 country <> parent_country
470 ON CONFLICT DO NOTHING""",
471 family, prefix,
472 )
473
474 self.db.execute("""
475 INSERT INTO autnums(number, name, source)
476 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
477 JOIN _organizations ON _autnums.organization = _organizations.handle
478 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
479 """)
480
481 # Download all extended sources
482 for source_key in location.importer.EXTENDED_SOURCES:
483 for single_url in location.importer.EXTENDED_SOURCES[source_key]:
484 with self.db.transaction():
485 # Download data
486 with downloader.request(single_url) as f:
487 for line in f:
488 self._parse_line(line, source_key, validcountries)
489
490 def _check_parsed_network(self, network):
491 """
492 Assistive function to detect and subsequently sort out parsed
493 networks from RIR data (both Whois and so-called "extended sources"),
494 which are or have...
495
496 (a) not globally routable (RFC 1918 space, et al.)
497 (b) covering a too large chunk of the IP address space (prefix length
498 is < 7 for IPv4 networks, and < 10 for IPv6)
499 (c) "0.0.0.0" or "::" as a network address
500 (d) are too small for being publicly announced (we have decided not to
501 process them at the moment, as they significantly enlarge our
502 database without providing very helpful additional information)
503
504 This unfortunately is necessary due to brain-dead clutter across
505 various RIR databases, causing mismatches and eventually disruptions.
506
507 We will return False in case a network is not suitable for adding
508 it to our database, and True otherwise.
509 """
510
511 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
512 return False
513
514 if not network.is_global:
515 log.debug("Skipping non-globally routable network: %s" % network)
516 return False
517
518 if network.version == 4:
519 if network.prefixlen < 7:
520 log.debug("Skipping too big IP chunk: %s" % network)
521 return False
522
523 if network.prefixlen > 24:
524 log.debug("Skipping network too small to be publicly announced: %s" % network)
525 return False
526
527 if str(network.network_address) == "0.0.0.0":
528 log.debug("Skipping network based on 0.0.0.0: %s" % network)
529 return False
530
531 elif network.version == 6:
532 if network.prefixlen < 10:
533 log.debug("Skipping too big IP chunk: %s" % network)
534 return False
535
536 if network.prefixlen > 48:
537 log.debug("Skipping network too small to be publicly announced: %s" % network)
538 return False
539
540 if str(network.network_address) == "::":
541 log.debug("Skipping network based on '::': %s" % network)
542 return False
543
544 else:
545 # This should not happen...
546 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
547 return False
548
549 # In case we have made it here, the network is considered to
550 # be suitable for libloc consumption...
551 return True
552
553 def _parse_block(self, block, source_key, validcountries = None):
554 # Get first line to find out what type of block this is
555 line = block[0]
556
557 # aut-num
558 if line.startswith("aut-num:"):
559 return self._parse_autnum_block(block, source_key)
560
561 # inetnum
562 if line.startswith("inet6num:") or line.startswith("inetnum:"):
563 return self._parse_inetnum_block(block, source_key, validcountries)
564
565 # organisation
566 elif line.startswith("organisation:"):
567 return self._parse_org_block(block, source_key)
568
569 def _parse_autnum_block(self, block, source_key):
570 autnum = {}
571 for line in block:
572 # Split line
573 key, val = split_line(line)
574
575 if key == "aut-num":
576 m = re.match(r"^(AS|as)(\d+)", val)
577 if m:
578 autnum["asn"] = m.group(2)
579
580 elif key == "org":
581 autnum[key] = val.upper()
582
583 # Skip empty objects
584 if not autnum:
585 return
586
587 # Insert into database
588 self.db.execute("INSERT INTO _autnums(number, organization, source) \
589 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
590 organization = excluded.organization",
591 autnum.get("asn"), autnum.get("org"), source_key,
592 )
593
594 def _parse_inetnum_block(self, block, source_key, validcountries = None):
595 log.debug("Parsing inetnum block:")
596
597 inetnum = {}
598 for line in block:
599 log.debug(line)
600
601 # Split line
602 key, val = split_line(line)
603
604 # Filter any inetnum records which are only referring to IP space
605 # not managed by that specific RIR...
606 if key == "netname":
607 if re.match(r"(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK)", val.strip()):
608 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
609 return
610
611 if key == "inetnum":
612 start_address, delim, end_address = val.partition("-")
613
614 # Strip any excess space
615 start_address, end_address = start_address.rstrip(), end_address.strip()
616
617 # Convert to IP address
618 try:
619 start_address = ipaddress.ip_address(start_address)
620 end_address = ipaddress.ip_address(end_address)
621 except ValueError:
622 log.warning("Could not parse line: %s" % line)
623 return
624
625 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
626
627 elif key == "inet6num":
628 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
629
630 elif key == "country":
631 val = val.upper()
632
633 # Catch RIR data objects with more than one country code...
634 if not key in inetnum:
635 inetnum[key] = []
636 else:
637 if val in inetnum.get("country"):
638 # ... but keep this list distinct...
639 continue
640
641 inetnum[key].append(val)
642
643 # Skip empty objects
644 if not inetnum or not "country" in inetnum:
645 return
646
647 # Prepare skipping objects with unknown country codes...
648 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
649
650 # Iterate through all networks enumerated from above, check them for plausibility and insert
651 # them into the database, if _check_parsed_network() succeeded
652 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
653 if self._check_parsed_network(single_network):
654
655 # Skip objects with unknown country codes if they are valid to avoid log spam...
656 if validcountries and invalidcountries:
657 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
658 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
659
660 # Everything is fine here, run INSERT statement...
661 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
662 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
663 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
664 )
665
666 def _parse_org_block(self, block, source_key):
667 org = {}
668 for line in block:
669 # Split line
670 key, val = split_line(line)
671
672 if key == "organisation":
673 org[key] = val.upper()
674 elif key == "org-name":
675 org[key] = val
676
677 # Skip empty objects
678 if not org:
679 return
680
681 self.db.execute("INSERT INTO _organizations(handle, name, source) \
682 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
683 UPDATE SET name = excluded.name",
684 org.get("organisation"), org.get("org-name"), source_key,
685 )
686
687 def _parse_line(self, line, source_key, validcountries = None):
688 # Skip version line
689 if line.startswith("2"):
690 return
691
692 # Skip comments
693 if line.startswith("#"):
694 return
695
696 try:
697 registry, country_code, type, line = line.split("|", 3)
698 except:
699 log.warning("Could not parse line: %s" % line)
700 return
701
702 # Skip any lines that are for stats only or do not have a country
703 # code at all (avoids log spam below)
704 if not country_code or country_code == '*':
705 return
706
707 # Skip objects with unknown country codes
708 if validcountries and country_code not in validcountries:
709 log.warning("Skipping line with bogus country '%s': %s" % \
710 (country_code, line))
711 return
712
713 if type in ("ipv6", "ipv4"):
714 return self._parse_ip_line(country_code, type, line, source_key)
715
716 def _parse_ip_line(self, country, type, line, source_key):
717 try:
718 address, prefix, date, status, organization = line.split("|")
719 except ValueError:
720 organization = None
721
722 # Try parsing the line without organization
723 try:
724 address, prefix, date, status = line.split("|")
725 except ValueError:
726 log.warning("Unhandled line format: %s" % line)
727 return
728
729 # Skip anything that isn't properly assigned
730 if not status in ("assigned", "allocated"):
731 return
732
733 # Cast prefix into an integer
734 try:
735 prefix = int(prefix)
736 except:
737 log.warning("Invalid prefix: %s" % prefix)
738 return
739
740 # Fix prefix length for IPv4
741 if type == "ipv4":
742 prefix = 32 - int(math.log(prefix, 2))
743
744 # Try to parse the address
745 try:
746 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
747 except ValueError:
748 log.warning("Invalid IP address: %s" % address)
749 return
750
751 if not self._check_parsed_network(network):
752 return
753
754 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
755 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
756 UPDATE SET country = excluded.country",
757 "%s" % network, country, [country], source_key,
758 )
759
760 def handle_update_announcements(self, ns):
761 server = ns.server[0]
762
763 with self.db.transaction():
764 if server.startswith("/"):
765 self._handle_update_announcements_from_bird(server)
766 else:
767 self._handle_update_announcements_from_telnet(server)
768
769 # Purge anything we never want here
770 self.db.execute("""
771 -- Delete default routes
772 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
773
774 -- Delete anything that is not global unicast address space
775 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
776
777 -- DELETE "current network" address space
778 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
779
780 -- DELETE local loopback address space
781 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
782
783 -- DELETE RFC 1918 address space
784 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
785 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
786 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
787
788 -- DELETE test, benchmark and documentation address space
789 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
790 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
791 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
792 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
793 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
794
795 -- DELETE CGNAT address space (RFC 6598)
796 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
797
798 -- DELETE link local address space
799 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
800
801 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
802 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
803 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
804
805 -- DELETE multicast and reserved address space
806 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
807 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
808
809 -- Delete networks that are too small to be in the global routing table
810 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
811 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
812
813 -- Delete any non-public or reserved ASNs
814 DELETE FROM announcements WHERE NOT (
815 (autnum >= 1 AND autnum <= 23455)
816 OR
817 (autnum >= 23457 AND autnum <= 64495)
818 OR
819 (autnum >= 131072 AND autnum <= 4199999999)
820 );
821
822 -- Delete everything that we have not seen for 14 days
823 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
824 """)
825
826 def _handle_update_announcements_from_bird(self, server):
827 # Pre-compile the regular expression for faster searching
828 route = re.compile(b"^\s(.+?)\s+.+?\[AS(.*?).\]$")
829
830 log.info("Requesting routing table from Bird (%s)" % server)
831
832 # Send command to list all routes
833 for line in self._bird_cmd(server, "show route"):
834 m = route.match(line)
835 if not m:
836 log.debug("Could not parse line: %s" % line.decode())
837 continue
838
839 # Fetch the extracted network and ASN
840 network, autnum = m.groups()
841
842 # Insert it into the database
843 self.db.execute("INSERT INTO announcements(network, autnum) \
844 VALUES(%s, %s) ON CONFLICT (network) DO \
845 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
846 network.decode(), autnum.decode(),
847 )
848
849 def _handle_update_announcements_from_telnet(self, server):
850 # Pre-compile regular expression for routes
851 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
852
853 with telnetlib.Telnet(server) as t:
854 # Enable debug mode
855 #if ns.debug:
856 # t.set_debuglevel(10)
857
858 # Wait for console greeting
859 greeting = t.read_until(b"> ", timeout=30)
860 if not greeting:
861 log.error("Could not get a console prompt")
862 return 1
863
864 # Disable pagination
865 t.write(b"terminal length 0\n")
866
867 # Wait for the prompt to return
868 t.read_until(b"> ")
869
870 # Fetch the routing tables
871 for protocol in ("ipv6", "ipv4"):
872 log.info("Requesting %s routing table" % protocol)
873
874 # Request the full unicast routing table
875 t.write(b"show bgp %s unicast\n" % protocol.encode())
876
877 # Read entire header which ends with "Path"
878 t.read_until(b"Path\r\n")
879
880 while True:
881 # Try reading a full entry
882 # Those might be broken across multiple lines but ends with i
883 line = t.read_until(b"i\r\n", timeout=5)
884 if not line:
885 break
886
887 # Show line for debugging
888 #log.debug(repr(line))
889
890 # Try finding a route in here
891 m = route.match(line)
892 if m:
893 network, autnum = m.groups()
894
895 # Convert network to string
896 network = network.decode()
897
898 # Append /24 for IPv4 addresses
899 if not "/" in network and not ":" in network:
900 network = "%s/24" % network
901
902 # Convert AS number to integer
903 autnum = int(autnum)
904
905 log.info("Found announcement for %s by %s" % (network, autnum))
906
907 self.db.execute("INSERT INTO announcements(network, autnum) \
908 VALUES(%s, %s) ON CONFLICT (network) DO \
909 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
910 network, autnum,
911 )
912
913 log.info("Finished reading the %s routing table" % protocol)
914
915 def _bird_cmd(self, socket_path, command):
916 # Connect to the socket
917 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
918 s.connect(socket_path)
919
920 # Allocate some buffer
921 buffer = b""
922
923 # Send the command
924 s.send(b"%s\n" % command.encode())
925
926 while True:
927 # Fill up the buffer
928 buffer += s.recv(4096)
929
930 while True:
931 # Search for the next newline
932 pos = buffer.find(b"\n")
933
934 # If we cannot find one, we go back and read more data
935 if pos <= 0:
936 break
937
938 # Cut after the newline character
939 pos += 1
940
941 # Split the line we want and keep the rest in buffer
942 line, buffer = buffer[:pos], buffer[pos:]
943
944 # Look for the end-of-output indicator
945 if line == b"0000 \n":
946 return
947
948 # Otherwise return the line
949 yield line
950
951 def handle_update_overrides(self, ns):
952 with self.db.transaction():
953 # Drop all data that we have
954 self.db.execute("""
955 TRUNCATE TABLE autnum_overrides;
956 TRUNCATE TABLE network_overrides;
957 """)
958
959 for file in ns.files:
960 log.info("Reading %s..." % file)
961
962 with open(file, "rb") as f:
963 for type, block in location.importer.read_blocks(f):
964 if type == "net":
965 network = block.get("net")
966 # Try to parse and normalise the network
967 try:
968 network = ipaddress.ip_network(network, strict=False)
969 except ValueError as e:
970 log.warning("Invalid IP network: %s: %s" % (network, e))
971 continue
972
973 # Prevent that we overwrite all networks
974 if network.prefixlen == 0:
975 log.warning("Skipping %s: You cannot overwrite default" % network)
976 continue
977
978 self.db.execute("""
979 INSERT INTO network_overrides(
980 network,
981 country,
982 is_anonymous_proxy,
983 is_satellite_provider,
984 is_anycast
985 ) VALUES (%s, %s, %s, %s, %s)
986 ON CONFLICT (network) DO NOTHING""",
987 "%s" % network,
988 block.get("country"),
989 self._parse_bool(block, "is-anonymous-proxy"),
990 self._parse_bool(block, "is-satellite-provider"),
991 self._parse_bool(block, "is-anycast"),
992 )
993
994 elif type == "aut-num":
995 autnum = block.get("aut-num")
996
997 # Check if AS number begins with "AS"
998 if not autnum.startswith("AS"):
999 log.warning("Invalid AS number: %s" % autnum)
1000 continue
1001
1002 # Strip "AS"
1003 autnum = autnum[2:]
1004
1005 self.db.execute("""
1006 INSERT INTO autnum_overrides(
1007 number,
1008 name,
1009 country,
1010 is_anonymous_proxy,
1011 is_satellite_provider,
1012 is_anycast
1013 ) VALUES(%s, %s, %s, %s, %s, %s)
1014 ON CONFLICT DO NOTHING""",
1015 autnum,
1016 block.get("name"),
1017 block.get("country"),
1018 self._parse_bool(block, "is-anonymous-proxy"),
1019 self._parse_bool(block, "is-satellite-provider"),
1020 self._parse_bool(block, "is-anycast"),
1021 )
1022
1023 else:
1024 log.warning("Unsupported type: %s" % type)
1025
1026 @staticmethod
1027 def _parse_bool(block, key):
1028 val = block.get(key)
1029
1030 # There is no point to proceed when we got None
1031 if val is None:
1032 return
1033
1034 # Convert to lowercase
1035 val = val.lower()
1036
1037 # True
1038 if val in ("yes", "1"):
1039 return True
1040
1041 # False
1042 if val in ("no", "0"):
1043 return False
1044
1045 # Default to None
1046 return None
1047
1048 def handle_import_countries(self, ns):
1049 with self.db.transaction():
1050 # Drop all data that we have
1051 self.db.execute("TRUNCATE TABLE countries")
1052
1053 for file in ns.file:
1054 for line in file:
1055 line = line.rstrip()
1056
1057 # Ignore any comments
1058 if line.startswith("#"):
1059 continue
1060
1061 try:
1062 country_code, continent_code, name = line.split(maxsplit=2)
1063 except:
1064 log.warning("Could not parse line: %s" % line)
1065 continue
1066
1067 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
1068 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
1069
1070
1071 def split_line(line):
1072 key, colon, val = line.partition(":")
1073
1074 # Strip any excess space
1075 key = key.strip()
1076 val = val.strip()
1077
1078 return key, val
1079
1080 def main():
1081 # Run the command line interface
1082 c = CLI()
1083 c.run()
1084
1085 main()