]> git.ipfire.org Git - people/ms/libloc.git/blob - src/python/location-importer.in
importer: Drop EDROP as it has been merged into DROP
[people/ms/libloc.git] / src / python / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import logging
23 import math
24 import re
25 import sys
26 import telnetlib
27
28 # Load our location module
29 import location
30 import location.database
31 import location.importer
32 from location.i18n import _
33
34 # Initialise logging
35 log = logging.getLogger("location.importer")
36 log.propagate = 1
37
38 class CLI(object):
39 def parse_cli(self):
40 parser = argparse.ArgumentParser(
41 description=_("Location Importer Command Line Interface"),
42 )
43 subparsers = parser.add_subparsers()
44
45 # Global configuration flags
46 parser.add_argument("--debug", action="store_true",
47 help=_("Enable debug output"))
48 parser.add_argument("--quiet", action="store_true",
49 help=_("Enable quiet mode"))
50
51 # version
52 parser.add_argument("--version", action="version",
53 version="%(prog)s @VERSION@")
54
55 # Database
56 parser.add_argument("--database-host", required=True,
57 help=_("Database Hostname"), metavar=_("HOST"))
58 parser.add_argument("--database-name", required=True,
59 help=_("Database Name"), metavar=_("NAME"))
60 parser.add_argument("--database-username", required=True,
61 help=_("Database Username"), metavar=_("USERNAME"))
62 parser.add_argument("--database-password", required=True,
63 help=_("Database Password"), metavar=_("PASSWORD"))
64
65 # Write Database
66 write = subparsers.add_parser("write", help=_("Write database to file"))
67 write.set_defaults(func=self.handle_write)
68 write.add_argument("file", nargs=1, help=_("Database File"))
69 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
70 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
71 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
72 write.add_argument("--description", nargs="?", help=_("Sets a description"))
73 write.add_argument("--license", nargs="?", help=_("Sets the license"))
74 write.add_argument("--version", type=int, help=_("Database Format Version"))
75
76 # Update WHOIS
77 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
78 update_whois.set_defaults(func=self.handle_update_whois)
79
80 # Update announcements
81 update_announcements = subparsers.add_parser("update-announcements",
82 help=_("Update BGP Annoucements"))
83 update_announcements.set_defaults(func=self.handle_update_announcements)
84 update_announcements.add_argument("server", nargs=1,
85 help=_("Route Server to connect to"), metavar=_("SERVER"))
86
87 # Update overrides
88 update_overrides = subparsers.add_parser("update-overrides",
89 help=_("Update overrides"),
90 )
91 update_overrides.add_argument(
92 "files", nargs="+", help=_("Files to import"),
93 )
94 update_overrides.set_defaults(func=self.handle_update_overrides)
95
96 # Import countries
97 import_countries = subparsers.add_parser("import-countries",
98 help=_("Import countries"),
99 )
100 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
101 help=_("File to import"))
102 import_countries.set_defaults(func=self.handle_import_countries)
103
104 args = parser.parse_args()
105
106 # Configure logging
107 if args.debug:
108 location.logger.set_level(logging.DEBUG)
109 elif args.quiet:
110 location.logger.set_level(logging.WARNING)
111
112 # Print usage if no action was given
113 if not "func" in args:
114 parser.print_usage()
115 sys.exit(2)
116
117 return args
118
119 def run(self):
120 # Parse command line arguments
121 args = self.parse_cli()
122
123 # Initialise database
124 self.db = self._setup_database(args)
125
126 # Call function
127 ret = args.func(args)
128
129 # Return with exit code
130 if ret:
131 sys.exit(ret)
132
133 # Otherwise just exit
134 sys.exit(0)
135
136 def _setup_database(self, ns):
137 """
138 Initialise the database
139 """
140 # Connect to database
141 db = location.database.Connection(
142 host=ns.database_host, database=ns.database_name,
143 user=ns.database_username, password=ns.database_password,
144 )
145
146 with db.transaction():
147 db.execute("""
148 -- announcements
149 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
150 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
151 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
152 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
153 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
154
155 -- autnums
156 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
157 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
158
159 -- countries
160 CREATE TABLE IF NOT EXISTS countries(
161 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
162 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
163
164 -- networks
165 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
166 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
167 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
168
169 -- overrides
170 CREATE TABLE IF NOT EXISTS autnum_overrides(
171 number bigint NOT NULL,
172 name text,
173 country text,
174 is_anonymous_proxy boolean,
175 is_satellite_provider boolean,
176 is_anycast boolean
177 );
178 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
179 ON autnum_overrides(number);
180
181 CREATE TABLE IF NOT EXISTS network_overrides(
182 network inet NOT NULL,
183 country text,
184 is_anonymous_proxy boolean,
185 is_satellite_provider boolean,
186 is_anycast boolean
187 );
188 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
189 ON network_overrides(network);
190 """)
191
192 return db
193
194 def handle_write(self, ns):
195 """
196 Compiles a database in libloc format out of what is in the database
197 """
198 # Allocate a writer
199 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
200
201 # Set all metadata
202 if ns.vendor:
203 writer.vendor = ns.vendor
204
205 if ns.description:
206 writer.description = ns.description
207
208 if ns.license:
209 writer.license = ns.license
210
211 # Add all Autonomous Systems
212 log.info("Writing Autonomous Systems...")
213
214 # Select all ASes with a name
215 rows = self.db.query("""
216 SELECT
217 autnums.number AS number,
218 COALESCE(
219 (SELECT overrides.name FROM autnum_overrides overrides
220 WHERE overrides.number = autnums.number),
221 autnums.name
222 ) AS name
223 FROM autnums
224 WHERE name <> %s ORDER BY number
225 """, "")
226
227 for row in rows:
228 a = writer.add_as(row.number)
229 a.name = row.name
230
231 # Add all networks
232 log.info("Writing networks...")
233
234 # Select all known networks
235 rows = self.db.query("""
236 -- Get a (sorted) list of all known networks
237 WITH known_networks AS (
238 SELECT network FROM announcements
239 UNION
240 SELECT network FROM networks
241 ORDER BY network
242 )
243
244 -- Return a list of those networks enriched with all
245 -- other information that we store in the database
246 SELECT
247 DISTINCT ON (known_networks.network)
248 known_networks.network AS network,
249 announcements.autnum AS autnum,
250
251 -- Country
252 COALESCE(
253 (
254 SELECT country FROM network_overrides overrides
255 WHERE announcements.network <<= overrides.network
256 ORDER BY masklen(overrides.network) DESC
257 LIMIT 1
258 ),
259 (
260 SELECT country FROM autnum_overrides overrides
261 WHERE announcements.autnum = overrides.number
262 ),
263 networks.country
264 ) AS country,
265
266 -- Flags
267 COALESCE(
268 (
269 SELECT is_anonymous_proxy FROM network_overrides overrides
270 WHERE announcements.network <<= overrides.network
271 ORDER BY masklen(overrides.network) DESC
272 LIMIT 1
273 ),
274 (
275 SELECT is_anonymous_proxy FROM autnum_overrides overrides
276 WHERE announcements.autnum = overrides.number
277 ),
278 FALSE
279 ) AS is_anonymous_proxy,
280 COALESCE(
281 (
282 SELECT is_satellite_provider FROM network_overrides overrides
283 WHERE announcements.network <<= overrides.network
284 ORDER BY masklen(overrides.network) DESC
285 LIMIT 1
286 ),
287 (
288 SELECT is_satellite_provider FROM autnum_overrides overrides
289 WHERE announcements.autnum = overrides.number
290 ),
291 FALSE
292 ) AS is_satellite_provider,
293 COALESCE(
294 (
295 SELECT is_anycast FROM network_overrides overrides
296 WHERE announcements.network <<= overrides.network
297 ORDER BY masklen(overrides.network) DESC
298 LIMIT 1
299 ),
300 (
301 SELECT is_anycast FROM autnum_overrides overrides
302 WHERE announcements.autnum = overrides.number
303 ),
304 FALSE
305 ) AS is_anycast,
306
307 -- Must be part of returned values for ORDER BY clause
308 masklen(known_networks.network) AS sort
309 FROM known_networks
310 LEFT JOIN announcements ON known_networks.network <<= announcements.network
311 LEFT JOIN networks ON known_networks.network <<= networks.network
312 ORDER BY known_networks.network, sort DESC
313 """)
314
315 for row in rows:
316 network = writer.add_network(row.network)
317
318 # Save country
319 if row.country:
320 network.country_code = row.country
321
322 # Save ASN
323 if row.autnum:
324 network.asn = row.autnum
325
326 # Set flags
327 if row.is_anonymous_proxy:
328 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
329
330 if row.is_satellite_provider:
331 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
332
333 if row.is_anycast:
334 network.set_flag(location.NETWORK_FLAG_ANYCAST)
335
336 # Add all countries
337 log.info("Writing countries...")
338 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
339
340 for row in rows:
341 c = writer.add_country(row.country_code)
342 c.continent_code = row.continent_code
343 c.name = row.name
344
345 # Write everything to file
346 log.info("Writing database to file...")
347 for file in ns.file:
348 writer.write(file)
349
350 def handle_update_whois(self, ns):
351 downloader = location.importer.Downloader()
352
353 # Download all sources
354 with self.db.transaction():
355 # Create some temporary tables to store parsed data
356 self.db.execute("""
357 CREATE TEMPORARY TABLE _autnums(number integer, organization text)
358 ON COMMIT DROP;
359 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
360
361 CREATE TEMPORARY TABLE _organizations(handle text, name text)
362 ON COMMIT DROP;
363 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
364 """)
365
366 for source in location.importer.WHOIS_SOURCES:
367 with downloader.request(source, return_blocks=True) as f:
368 for block in f:
369 self._parse_block(block)
370
371 self.db.execute("""
372 INSERT INTO autnums(number, name)
373 SELECT _autnums.number, _organizations.name FROM _autnums
374 LEFT JOIN _organizations ON _autnums.organization = _organizations.handle
375 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
376 """)
377
378 # Download all extended sources
379 for source in location.importer.EXTENDED_SOURCES:
380 with self.db.transaction():
381 # Download data
382 with downloader.request(source) as f:
383 for line in f:
384 self._parse_line(line)
385
386 def _parse_block(self, block):
387 # Get first line to find out what type of block this is
388 line = block[0]
389
390 # aut-num
391 if line.startswith("aut-num:"):
392 return self._parse_autnum_block(block)
393
394 # organisation
395 elif line.startswith("organisation:"):
396 return self._parse_org_block(block)
397
398 def _parse_autnum_block(self, block):
399 autnum = {}
400 for line in block:
401 # Split line
402 key, val = split_line(line)
403
404 if key == "aut-num":
405 m = re.match(r"^(AS|as)(\d+)", val)
406 if m:
407 autnum["asn"] = m.group(2)
408
409 elif key == "org":
410 autnum[key] = val
411
412 # Skip empty objects
413 if not autnum:
414 return
415
416 # Insert into database
417 self.db.execute("INSERT INTO _autnums(number, organization) \
418 VALUES(%s, %s) ON CONFLICT (number) DO UPDATE SET \
419 organization = excluded.organization",
420 autnum.get("asn"), autnum.get("org"),
421 )
422
423 def _parse_org_block(self, block):
424 org = {}
425 for line in block:
426 # Split line
427 key, val = split_line(line)
428
429 if key in ("organisation", "org-name"):
430 org[key] = val
431
432 # Skip empty objects
433 if not org:
434 return
435
436 self.db.execute("INSERT INTO _organizations(handle, name) \
437 VALUES(%s, %s) ON CONFLICT (handle) DO \
438 UPDATE SET name = excluded.name",
439 org.get("organisation"), org.get("org-name"),
440 )
441
442 def _parse_line(self, line):
443 # Skip version line
444 if line.startswith("2"):
445 return
446
447 # Skip comments
448 if line.startswith("#"):
449 return
450
451 try:
452 registry, country_code, type, line = line.split("|", 3)
453 except:
454 log.warning("Could not parse line: %s" % line)
455 return
456
457 # Skip any lines that are for stats only
458 if country_code == "*":
459 return
460
461 if type in ("ipv6", "ipv4"):
462 return self._parse_ip_line(country_code, type, line)
463
464 def _parse_ip_line(self, country, type, line):
465 try:
466 address, prefix, date, status, organization = line.split("|")
467 except ValueError:
468 organization = None
469
470 # Try parsing the line without organization
471 try:
472 address, prefix, date, status = line.split("|")
473 except ValueError:
474 log.warning("Unhandled line format: %s" % line)
475 return
476
477 # Skip anything that isn't properly assigned
478 if not status in ("assigned", "allocated"):
479 return
480
481 # Cast prefix into an integer
482 try:
483 prefix = int(prefix)
484 except:
485 log.warning("Invalid prefix: %s" % prefix)
486 return
487
488 # Fix prefix length for IPv4
489 if type == "ipv4":
490 prefix = 32 - int(math.log(prefix, 2))
491
492 # Try to parse the address
493 try:
494 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
495 except ValueError:
496 log.warning("Invalid IP address: %s" % address)
497 return
498
499 self.db.execute("INSERT INTO networks(network, country) \
500 VALUES(%s, %s) ON CONFLICT (network) DO \
501 UPDATE SET country = excluded.country",
502 "%s" % network, country,
503 )
504
505 def handle_update_announcements(self, ns):
506 server = ns.server[0]
507
508 # Pre-compile regular expression for routes
509 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
510
511 with telnetlib.Telnet(server) as t:
512 # Enable debug mode
513 #if ns.debug:
514 # t.set_debuglevel(10)
515
516 # Wait for console greeting
517 greeting = t.read_until(b"> ", timeout=30)
518 if not greeting:
519 log.error("Could not get a console prompt")
520 return 1
521
522 # Disable pagination
523 t.write(b"terminal length 0\n")
524
525 # Wait for the prompt to return
526 t.read_until(b"> ")
527
528 # Fetch the routing tables
529 with self.db.transaction():
530 for protocol in ("ipv6", "ipv4"):
531 log.info("Requesting %s routing table" % protocol)
532
533 # Request the full unicast routing table
534 t.write(b"show bgp %s unicast\n" % protocol.encode())
535
536 # Read entire header which ends with "Path"
537 t.read_until(b"Path\r\n")
538
539 while True:
540 # Try reading a full entry
541 # Those might be broken across multiple lines but ends with i
542 line = t.read_until(b"i\r\n", timeout=5)
543 if not line:
544 break
545
546 # Show line for debugging
547 #log.debug(repr(line))
548
549 # Try finding a route in here
550 m = route.match(line)
551 if m:
552 network, autnum = m.groups()
553
554 # Convert network to string
555 network = network.decode()
556
557 # Append /24 for IPv4 addresses
558 if not "/" in network and not ":" in network:
559 network = "%s/24" % network
560
561 # Convert AS number to integer
562 autnum = int(autnum)
563
564 log.info("Found announcement for %s by %s" % (network, autnum))
565
566 self.db.execute("INSERT INTO announcements(network, autnum) \
567 VALUES(%s, %s) ON CONFLICT (network) DO \
568 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
569 network, autnum,
570 )
571
572 log.info("Finished reading the %s routing table" % protocol)
573
574 # Purge anything we never want here
575 self.db.execute("""
576 -- Delete default routes
577 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
578
579 -- Delete anything that is not global unicast address space
580 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
581
582 -- DELETE "current network" address space
583 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
584
585 -- DELETE local loopback address space
586 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
587
588 -- DELETE RFC 1918 address space
589 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
590 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
591 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
592
593 -- DELETE test, benchmark and documentation address space
594 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
595 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
596 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
597 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
598 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
599
600 -- DELETE CGNAT address space (RFC 6598)
601 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
602
603 -- DELETE link local address space
604 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
605
606 -- DELETE IPv6 to IPv4 (6to4) address space
607 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
608
609 -- DELETE multicast and reserved address space
610 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
611 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
612
613 -- Delete networks that are too small to be in the global routing table
614 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
615 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
616
617 -- Delete any non-public or reserved ASNs
618 DELETE FROM announcements WHERE NOT (
619 (autnum >= 1 AND autnum <= 23455)
620 OR
621 (autnum >= 23457 AND autnum <= 64495)
622 OR
623 (autnum >= 131072 AND autnum <= 4199999999)
624 );
625
626 -- Delete everything that we have not seen for 14 days
627 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
628 """)
629
630 def handle_update_overrides(self, ns):
631 with self.db.transaction():
632 # Drop all data that we have
633 self.db.execute("""
634 TRUNCATE TABLE autnum_overrides;
635 TRUNCATE TABLE network_overrides;
636 """)
637
638 for file in ns.files:
639 log.info("Reading %s..." % file)
640
641 with open(file, "rb") as f:
642 for type, block in location.importer.read_blocks(f):
643 if type == "net":
644 network = block.get("net")
645 # Try to parse and normalise the network
646 try:
647 network = ipaddress.ip_network(network, strict=False)
648 except ValueError as e:
649 log.warning("Invalid IP network: %s: %s" % (network, e))
650 continue
651
652 # Prevent that we overwrite all networks
653 if network.prefixlen == 0:
654 log.warning("Skipping %s: You cannot overwrite default" % network)
655 continue
656
657 self.db.execute("""
658 INSERT INTO network_overrides(
659 network,
660 country,
661 is_anonymous_proxy,
662 is_satellite_provider,
663 is_anycast
664 ) VALUES (%s, %s, %s, %s, %s)
665 ON CONFLICT (network) DO NOTHING""",
666 "%s" % network,
667 block.get("country"),
668 self._parse_bool(block, "is-anonymous-proxy"),
669 self._parse_bool(block, "is-satellite-provider"),
670 self._parse_bool(block, "is-anycast"),
671 )
672
673 elif type == "aut-num":
674 autnum = block.get("aut-num")
675
676 # Check if AS number begins with "AS"
677 if not autnum.startswith("AS"):
678 log.warning("Invalid AS number: %s" % autnum)
679 continue
680
681 # Strip "AS"
682 autnum = autnum[2:]
683
684 self.db.execute("""
685 INSERT INTO autnum_overrides(
686 number,
687 name,
688 country,
689 is_anonymous_proxy,
690 is_satellite_provider,
691 is_anycast
692 ) VALUES(%s, %s, %s, %s, %s, %s)
693 ON CONFLICT DO NOTHING""",
694 autnum,
695 block.get("name"),
696 block.get("country"),
697 self._parse_bool(block, "is-anonymous-proxy"),
698 self._parse_bool(block, "is-satellite-provider"),
699 self._parse_bool(block, "is-anycast"),
700 )
701
702 else:
703 log.warning("Unsupport type: %s" % type)
704
705 @staticmethod
706 def _parse_bool(block, key):
707 val = block.get(key)
708
709 # There is no point to proceed when we got None
710 if val is None:
711 return
712
713 # Convert to lowercase
714 val = val.lower()
715
716 # True
717 if val in ("yes", "1"):
718 return True
719
720 # False
721 if val in ("no", "0"):
722 return False
723
724 # Default to None
725 return None
726
727 def handle_import_countries(self, ns):
728 with self.db.transaction():
729 # Drop all data that we have
730 self.db.execute("TRUNCATE TABLE countries")
731
732 for file in ns.file:
733 for line in file:
734 line = line.rstrip()
735
736 # Ignore any comments
737 if line.startswith("#"):
738 continue
739
740 try:
741 country_code, continent_code, name = line.split(maxsplit=2)
742 except:
743 log.warning("Could not parse line: %s" % line)
744 continue
745
746 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
747 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
748
749
750 def split_line(line):
751 key, colon, val = line.partition(":")
752
753 # Strip any excess space
754 key = key.strip()
755 val = val.strip()
756
757 return key, val
758
759 def main():
760 # Run the command line interface
761 c = CLI()
762 c.run()
763
764 main()