63df5e82969515e8c248078acd54d556babe35be
[people/ms/libloc.git] / src / python / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import logging
23 import math
24 import re
25 import sys
26 import telnetlib
27
28 # Load our location module
29 import location
30 import location.database
31 import location.importer
32 from location.i18n import _
33
34 # Initialise logging
35 log = logging.getLogger("location.importer")
36 log.propagate = 1
37
38 class CLI(object):
39 def parse_cli(self):
40 parser = argparse.ArgumentParser(
41 description=_("Location Importer Command Line Interface"),
42 )
43 subparsers = parser.add_subparsers()
44
45 # Global configuration flags
46 parser.add_argument("--debug", action="store_true",
47 help=_("Enable debug output"))
48 parser.add_argument("--quiet", action="store_true",
49 help=_("Enable quiet mode"))
50
51 # version
52 parser.add_argument("--version", action="version",
53 version="%(prog)s @VERSION@")
54
55 # Database
56 parser.add_argument("--database-host", required=True,
57 help=_("Database Hostname"), metavar=_("HOST"))
58 parser.add_argument("--database-name", required=True,
59 help=_("Database Name"), metavar=_("NAME"))
60 parser.add_argument("--database-username", required=True,
61 help=_("Database Username"), metavar=_("USERNAME"))
62 parser.add_argument("--database-password", required=True,
63 help=_("Database Password"), metavar=_("PASSWORD"))
64
65 # Write Database
66 write = subparsers.add_parser("write", help=_("Write database to file"))
67 write.set_defaults(func=self.handle_write)
68 write.add_argument("file", nargs=1, help=_("Database File"))
69 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
70 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
71 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
72 write.add_argument("--description", nargs="?", help=_("Sets a description"))
73 write.add_argument("--license", nargs="?", help=_("Sets the license"))
74 write.add_argument("--version", type=int, help=_("Database Format Version"))
75
76 # Update WHOIS
77 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
78 update_whois.set_defaults(func=self.handle_update_whois)
79
80 # Update announcements
81 update_announcements = subparsers.add_parser("update-announcements",
82 help=_("Update BGP Annoucements"))
83 update_announcements.set_defaults(func=self.handle_update_announcements)
84 update_announcements.add_argument("server", nargs=1,
85 help=_("Route Server to connect to"), metavar=_("SERVER"))
86
87 # Update overrides
88 update_overrides = subparsers.add_parser("update-overrides",
89 help=_("Update overrides"),
90 )
91 update_overrides.add_argument(
92 "files", nargs="+", help=_("Files to import"),
93 )
94 update_overrides.set_defaults(func=self.handle_update_overrides)
95
96 # Import countries
97 import_countries = subparsers.add_parser("import-countries",
98 help=_("Import countries"),
99 )
100 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
101 help=_("File to import"))
102 import_countries.set_defaults(func=self.handle_import_countries)
103
104 args = parser.parse_args()
105
106 # Configure logging
107 if args.debug:
108 location.logger.set_level(logging.DEBUG)
109 elif args.quiet:
110 location.logger.set_level(logging.WARNING)
111
112 # Print usage if no action was given
113 if not "func" in args:
114 parser.print_usage()
115 sys.exit(2)
116
117 return args
118
119 def run(self):
120 # Parse command line arguments
121 args = self.parse_cli()
122
123 # Initialise database
124 self.db = self._setup_database(args)
125
126 # Call function
127 ret = args.func(args)
128
129 # Return with exit code
130 if ret:
131 sys.exit(ret)
132
133 # Otherwise just exit
134 sys.exit(0)
135
136 def _setup_database(self, ns):
137 """
138 Initialise the database
139 """
140 # Connect to database
141 db = location.database.Connection(
142 host=ns.database_host, database=ns.database_name,
143 user=ns.database_username, password=ns.database_password,
144 )
145
146 with db.transaction():
147 db.execute("""
148 -- announcements
149 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
150 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
151 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
152 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
153 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
154
155 -- autnums
156 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
157 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
158
159 -- countries
160 CREATE TABLE IF NOT EXISTS countries(
161 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
162 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
163
164 -- networks
165 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
166 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
167 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
168
169 -- overrides
170 CREATE TABLE IF NOT EXISTS autnum_overrides(
171 number bigint NOT NULL,
172 name text,
173 country text,
174 is_anonymous_proxy boolean,
175 is_satellite_provider boolean,
176 is_anycast boolean
177 );
178 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
179 ON autnum_overrides(number);
180
181 CREATE TABLE IF NOT EXISTS network_overrides(
182 network inet NOT NULL,
183 country text,
184 is_anonymous_proxy boolean,
185 is_satellite_provider boolean,
186 is_anycast boolean
187 );
188 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
189 ON network_overrides(network);
190 """)
191
192 return db
193
194 def handle_write(self, ns):
195 """
196 Compiles a database in libloc format out of what is in the database
197 """
198 # Allocate a writer
199 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
200
201 # Set all metadata
202 if ns.vendor:
203 writer.vendor = ns.vendor
204
205 if ns.description:
206 writer.description = ns.description
207
208 if ns.license:
209 writer.license = ns.license
210
211 # Add all Autonomous Systems
212 log.info("Writing Autonomous Systems...")
213
214 # Select all ASes with a name
215 rows = self.db.query("""
216 SELECT
217 autnums.number AS number,
218 COALESCE(
219 (SELECT overrides.name FROM autnum_overrides overrides
220 WHERE overrides.number = autnums.number),
221 autnums.name
222 ) AS name
223 FROM autnums
224 WHERE name <> %s ORDER BY number
225 """, "")
226
227 for row in rows:
228 a = writer.add_as(row.number)
229 a.name = row.name
230
231 # Add all networks
232 log.info("Writing networks...")
233
234 # Select all known networks
235 rows = self.db.query("""
236 SELECT
237 DISTINCT ON (announcements.network)
238 announcements.network AS network,
239 announcements.autnum AS autnum,
240
241 -- Country
242 COALESCE(
243 (
244 SELECT country FROM network_overrides overrides
245 WHERE announcements.network <<= overrides.network
246 ORDER BY masklen(overrides.network) DESC
247 LIMIT 1
248 ),
249 (
250 SELECT country FROM autnum_overrides overrides
251 WHERE announcements.autnum = overrides.number
252 ),
253 networks.country
254 ) AS country,
255
256 -- Must be part of returned values for ORDER BY clause
257 masklen(networks.network) AS sort,
258
259 -- Flags
260 COALESCE(
261 (
262 SELECT is_anonymous_proxy FROM network_overrides overrides
263 WHERE announcements.network <<= overrides.network
264 ORDER BY masklen(overrides.network) DESC
265 LIMIT 1
266 ),
267 (
268 SELECT is_anonymous_proxy FROM autnum_overrides overrides
269 WHERE announcements.autnum = overrides.number
270 ),
271 FALSE
272 ) AS is_anonymous_proxy,
273 COALESCE(
274 (
275 SELECT is_satellite_provider FROM network_overrides overrides
276 WHERE announcements.network <<= overrides.network
277 ORDER BY masklen(overrides.network) DESC
278 LIMIT 1
279 ),
280 (
281 SELECT is_satellite_provider FROM autnum_overrides overrides
282 WHERE announcements.autnum = overrides.number
283 ),
284 FALSE
285 ) AS is_satellite_provider,
286 COALESCE(
287 (
288 SELECT is_anycast FROM network_overrides overrides
289 WHERE announcements.network <<= overrides.network
290 ORDER BY masklen(overrides.network) DESC
291 LIMIT 1
292 ),
293 (
294 SELECT is_anycast FROM autnum_overrides overrides
295 WHERE announcements.autnum = overrides.number
296 ),
297 FALSE
298 ) AS is_anycast
299 FROM announcements
300 LEFT JOIN networks ON announcements.network <<= networks.network
301 ORDER BY announcements.network, sort DESC
302 """)
303
304 for row in rows:
305 network = writer.add_network(row.network)
306
307 # Save AS & country
308 network.asn, network.country_code = row.autnum, row.country
309
310 # Set flags
311 if row.is_anonymous_proxy:
312 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
313
314 if row.is_satellite_provider:
315 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
316
317 if row.is_anycast:
318 network.set_flag(location.NETWORK_FLAG_ANYCAST)
319
320 # Add all countries
321 log.info("Writing countries...")
322 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
323
324 for row in rows:
325 c = writer.add_country(row.country_code)
326 c.continent_code = row.continent_code
327 c.name = row.name
328
329 # Write everything to file
330 log.info("Writing database to file...")
331 for file in ns.file:
332 writer.write(file)
333
334 def handle_update_whois(self, ns):
335 downloader = location.importer.Downloader()
336
337 # Download all sources
338 with self.db.transaction():
339 # Create some temporary tables to store parsed data
340 self.db.execute("""
341 CREATE TEMPORARY TABLE _autnums(number integer, organization text)
342 ON COMMIT DROP;
343 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
344
345 CREATE TEMPORARY TABLE _organizations(handle text, name text)
346 ON COMMIT DROP;
347 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
348 """)
349
350 for source in location.importer.WHOIS_SOURCES:
351 with downloader.request(source, return_blocks=True) as f:
352 for block in f:
353 self._parse_block(block)
354
355 self.db.execute("""
356 INSERT INTO autnums(number, name)
357 SELECT _autnums.number, _organizations.name FROM _autnums
358 LEFT JOIN _organizations ON _autnums.organization = _organizations.handle
359 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
360 """)
361
362 # Download all extended sources
363 for source in location.importer.EXTENDED_SOURCES:
364 with self.db.transaction():
365 # Download data
366 with downloader.request(source) as f:
367 for line in f:
368 self._parse_line(line)
369
370 def _parse_block(self, block):
371 # Get first line to find out what type of block this is
372 line = block[0]
373
374 # aut-num
375 if line.startswith("aut-num:"):
376 return self._parse_autnum_block(block)
377
378 # organisation
379 elif line.startswith("organisation:"):
380 return self._parse_org_block(block)
381
382 def _parse_autnum_block(self, block):
383 autnum = {}
384 for line in block:
385 # Split line
386 key, val = split_line(line)
387
388 if key == "aut-num":
389 m = re.match(r"^(AS|as)(\d+)", val)
390 if m:
391 autnum["asn"] = m.group(2)
392
393 elif key == "org":
394 autnum[key] = val
395
396 # Skip empty objects
397 if not autnum:
398 return
399
400 # Insert into database
401 self.db.execute("INSERT INTO _autnums(number, organization) \
402 VALUES(%s, %s) ON CONFLICT (number) DO UPDATE SET \
403 organization = excluded.organization",
404 autnum.get("asn"), autnum.get("org"),
405 )
406
407 def _parse_org_block(self, block):
408 org = {}
409 for line in block:
410 # Split line
411 key, val = split_line(line)
412
413 if key in ("organisation", "org-name"):
414 org[key] = val
415
416 # Skip empty objects
417 if not org:
418 return
419
420 self.db.execute("INSERT INTO _organizations(handle, name) \
421 VALUES(%s, %s) ON CONFLICT (handle) DO \
422 UPDATE SET name = excluded.name",
423 org.get("organisation"), org.get("org-name"),
424 )
425
426 def _parse_line(self, line):
427 # Skip version line
428 if line.startswith("2"):
429 return
430
431 # Skip comments
432 if line.startswith("#"):
433 return
434
435 try:
436 registry, country_code, type, line = line.split("|", 3)
437 except:
438 log.warning("Could not parse line: %s" % line)
439 return
440
441 # Skip any lines that are for stats only
442 if country_code == "*":
443 return
444
445 if type in ("ipv6", "ipv4"):
446 return self._parse_ip_line(country_code, type, line)
447
448 def _parse_ip_line(self, country, type, line):
449 try:
450 address, prefix, date, status, organization = line.split("|")
451 except ValueError:
452 organization = None
453
454 # Try parsing the line without organization
455 try:
456 address, prefix, date, status = line.split("|")
457 except ValueError:
458 log.warning("Unhandled line format: %s" % line)
459 return
460
461 # Skip anything that isn't properly assigned
462 if not status in ("assigned", "allocated"):
463 return
464
465 # Cast prefix into an integer
466 try:
467 prefix = int(prefix)
468 except:
469 log.warning("Invalid prefix: %s" % prefix)
470 return
471
472 # Fix prefix length for IPv4
473 if type == "ipv4":
474 prefix = 32 - int(math.log(prefix, 2))
475
476 # Try to parse the address
477 try:
478 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
479 except ValueError:
480 log.warning("Invalid IP address: %s" % address)
481 return
482
483 self.db.execute("INSERT INTO networks(network, country) \
484 VALUES(%s, %s) ON CONFLICT (network) DO \
485 UPDATE SET country = excluded.country",
486 "%s" % network, country,
487 )
488
489 def handle_update_announcements(self, ns):
490 server = ns.server[0]
491
492 # Pre-compile regular expression for routes
493 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
494
495 with telnetlib.Telnet(server) as t:
496 # Enable debug mode
497 #if ns.debug:
498 # t.set_debuglevel(10)
499
500 # Wait for console greeting
501 greeting = t.read_until(b"> ", timeout=30)
502 if not greeting:
503 log.error("Could not get a console prompt")
504 return 1
505
506 # Disable pagination
507 t.write(b"terminal length 0\n")
508
509 # Wait for the prompt to return
510 t.read_until(b"> ")
511
512 # Fetch the routing tables
513 with self.db.transaction():
514 for protocol in ("ipv6", "ipv4"):
515 log.info("Requesting %s routing table" % protocol)
516
517 # Request the full unicast routing table
518 t.write(b"show bgp %s unicast\n" % protocol.encode())
519
520 # Read entire header which ends with "Path"
521 t.read_until(b"Path\r\n")
522
523 while True:
524 # Try reading a full entry
525 # Those might be broken across multiple lines but ends with i
526 line = t.read_until(b"i\r\n", timeout=5)
527 if not line:
528 break
529
530 # Show line for debugging
531 #log.debug(repr(line))
532
533 # Try finding a route in here
534 m = route.match(line)
535 if m:
536 network, autnum = m.groups()
537
538 # Convert network to string
539 network = network.decode()
540
541 # Append /24 for IPv4 addresses
542 if not "/" in network and not ":" in network:
543 network = "%s/24" % network
544
545 # Convert AS number to integer
546 autnum = int(autnum)
547
548 log.info("Found announcement for %s by %s" % (network, autnum))
549
550 self.db.execute("INSERT INTO announcements(network, autnum) \
551 VALUES(%s, %s) ON CONFLICT (network) DO \
552 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
553 network, autnum,
554 )
555
556 log.info("Finished reading the %s routing table" % protocol)
557
558 # Purge anything we never want here
559 self.db.execute("""
560 -- Delete default routes
561 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
562
563 -- Delete anything that is not global unicast address space
564 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
565
566 -- DELETE "current network" address space
567 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
568
569 -- DELETE local loopback address space
570 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
571
572 -- DELETE RFC 1918 address space
573 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
574 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
575 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
576
577 -- DELETE test, benchmark and documentation address space
578 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
579 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
580 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
581 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
582 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
583
584 -- DELETE CGNAT address space (RFC 6598)
585 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
586
587 -- DELETE link local address space
588 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
589
590 -- DELETE IPv6 to IPv4 (6to4) address space
591 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
592
593 -- DELETE multicast and reserved address space
594 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
595 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
596
597 -- Delete networks that are too small to be in the global routing table
598 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
599 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
600
601 -- Delete any non-public or reserved ASNs
602 DELETE FROM announcements WHERE NOT (
603 (autnum >= 1 AND autnum <= 23455)
604 OR
605 (autnum >= 23457 AND autnum <= 64495)
606 OR
607 (autnum >= 131072 AND autnum <= 4199999999)
608 );
609
610 -- Delete everything that we have not seen for 14 days
611 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
612 """)
613
614 def handle_update_overrides(self, ns):
615 with self.db.transaction():
616 # Drop all data that we have
617 self.db.execute("""
618 TRUNCATE TABLE autnum_overrides;
619 TRUNCATE TABLE network_overrides;
620 """)
621
622 for file in ns.files:
623 log.info("Reading %s..." % file)
624
625 with open(file, "rb") as f:
626 for type, block in location.importer.read_blocks(f):
627 if type == "net":
628 network = block.get("net")
629 # Try to parse and normalise the network
630 try:
631 network = ipaddress.ip_network(network, strict=False)
632 except ValueError as e:
633 log.warning("Invalid IP network: %s: %s" % (network, e))
634 continue
635
636 # Prevent that we overwrite all networks
637 if network.prefixlen == 0:
638 log.warning("Skipping %s: You cannot overwrite default" % network)
639 continue
640
641 self.db.execute("""
642 INSERT INTO network_overrides(
643 network,
644 country,
645 is_anonymous_proxy,
646 is_satellite_provider,
647 is_anycast
648 ) VALUES (%s, %s, %s, %s, %s)
649 ON CONFLICT (network) DO NOTHING""",
650 "%s" % network,
651 block.get("country"),
652 self._parse_bool(block, "is-anonymous-proxy"),
653 self._parse_bool(block, "is-satellite-provider"),
654 self._parse_bool(block, "is-anycast"),
655 )
656
657 elif type == "aut-num":
658 autnum = block.get("aut-num")
659
660 # Check if AS number begins with "AS"
661 if not autnum.startswith("AS"):
662 log.warning("Invalid AS number: %s" % autnum)
663 continue
664
665 # Strip "AS"
666 autnum = autnum[2:]
667
668 self.db.execute("""
669 INSERT INTO autnum_overrides(
670 number,
671 name,
672 country,
673 is_anonymous_proxy,
674 is_satellite_provider,
675 is_anycast
676 ) VALUES(%s, %s, %s, %s, %s, %s)
677 ON CONFLICT DO NOTHING""",
678 autnum,
679 block.get("name"),
680 block.get("country"),
681 self._parse_bool(block, "is-anonymous-proxy"),
682 self._parse_bool(block, "is-satellite-provider"),
683 self._parse_bool(block, "is-anycast"),
684 )
685
686 else:
687 log.warning("Unsupport type: %s" % type)
688
689 @staticmethod
690 def _parse_bool(block, key):
691 val = block.get(key)
692
693 # There is no point to proceed when we got None
694 if val is None:
695 return
696
697 # Convert to lowercase
698 val = val.lower()
699
700 # True
701 if val in ("yes", "1"):
702 return True
703
704 # False
705 if val in ("no", "0"):
706 return False
707
708 # Default to None
709 return None
710
711 def handle_import_countries(self, ns):
712 with self.db.transaction():
713 # Drop all data that we have
714 self.db.execute("TRUNCATE TABLE countries")
715
716 for file in ns.file:
717 for line in file:
718 line = line.rstrip()
719
720 # Ignore any comments
721 if line.startswith("#"):
722 continue
723
724 try:
725 country_code, continent_code, name = line.split(maxsplit=2)
726 except:
727 log.warning("Could not parse line: %s" % line)
728 continue
729
730 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
731 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
732
733
734 def split_line(line):
735 key, colon, val = line.partition(":")
736
737 # Strip any excess space
738 key = key.strip()
739 val = val.strip()
740
741 return key, val
742
743 def main():
744 # Run the command line interface
745 c = CLI()
746 c.run()
747
748 main()