]> git.ipfire.org Git - people/ms/libloc.git/blob - src/python/location-importer.in
Bump database version to "1"
[people/ms/libloc.git] / src / python / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import logging
23 import math
24 import re
25 import sys
26 import telnetlib
27
28 # Load our location module
29 import location
30 import location.database
31 import location.importer
32 from location.i18n import _
33
34 # Initialise logging
35 log = logging.getLogger("location.importer")
36 log.propagate = 1
37
38 class CLI(object):
39 def parse_cli(self):
40 parser = argparse.ArgumentParser(
41 description=_("Location Importer Command Line Interface"),
42 )
43 subparsers = parser.add_subparsers()
44
45 # Global configuration flags
46 parser.add_argument("--debug", action="store_true",
47 help=_("Enable debug output"))
48 parser.add_argument("--quiet", action="store_true",
49 help=_("Enable quiet mode"))
50
51 # version
52 parser.add_argument("--version", action="version",
53 version="%(prog)s @VERSION@")
54
55 # Database
56 parser.add_argument("--database-host", required=True,
57 help=_("Database Hostname"), metavar=_("HOST"))
58 parser.add_argument("--database-name", required=True,
59 help=_("Database Name"), metavar=_("NAME"))
60 parser.add_argument("--database-username", required=True,
61 help=_("Database Username"), metavar=_("USERNAME"))
62 parser.add_argument("--database-password", required=True,
63 help=_("Database Password"), metavar=_("PASSWORD"))
64
65 # Write Database
66 write = subparsers.add_parser("write", help=_("Write database to file"))
67 write.set_defaults(func=self.handle_write)
68 write.add_argument("file", nargs=1, help=_("Database File"))
69 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
70 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
71 write.add_argument("--description", nargs="?", help=_("Sets a description"))
72 write.add_argument("--license", nargs="?", help=_("Sets the license"))
73 write.add_argument("--version", type=int, help=_("Database Format Version"))
74
75 # Update WHOIS
76 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
77 update_whois.set_defaults(func=self.handle_update_whois)
78
79 # Update announcements
80 update_announcements = subparsers.add_parser("update-announcements",
81 help=_("Update BGP Annoucements"))
82 update_announcements.set_defaults(func=self.handle_update_announcements)
83 update_announcements.add_argument("server", nargs=1,
84 help=_("Route Server to connect to"), metavar=_("SERVER"))
85
86 # Update overrides
87 update_overrides = subparsers.add_parser("update-overrides",
88 help=_("Update overrides"),
89 )
90 update_overrides.add_argument(
91 "files", nargs="+", help=_("Files to import"),
92 )
93 update_overrides.set_defaults(func=self.handle_update_overrides)
94
95 args = parser.parse_args()
96
97 # Configure logging
98 if args.debug:
99 location.logger.set_level(logging.DEBUG)
100 elif args.quiet:
101 location.logger.set_level(logging.WARNING)
102
103 # Print usage if no action was given
104 if not "func" in args:
105 parser.print_usage()
106 sys.exit(2)
107
108 return args
109
110 def run(self):
111 # Parse command line arguments
112 args = self.parse_cli()
113
114 # Initialise database
115 self.db = self._setup_database(args)
116
117 # Call function
118 ret = args.func(args)
119
120 # Return with exit code
121 if ret:
122 sys.exit(ret)
123
124 # Otherwise just exit
125 sys.exit(0)
126
127 def _setup_database(self, ns):
128 """
129 Initialise the database
130 """
131 # Connect to database
132 db = location.database.Connection(
133 host=ns.database_host, database=ns.database_name,
134 user=ns.database_username, password=ns.database_password,
135 )
136
137 with db.transaction():
138 db.execute("""
139 -- announcements
140 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
141 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
142 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
143 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
144 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
145
146 -- autnums
147 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
148 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
149
150 -- networks
151 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
152 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
153 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
154
155 -- overrides
156 CREATE TABLE IF NOT EXISTS autnum_overrides(
157 number bigint NOT NULL,
158 name text,
159 country text,
160 is_anonymous_proxy boolean DEFAULT FALSE,
161 is_satellite_provider boolean DEFAULT FALSE,
162 is_anycast boolean DEFAULT FALSE
163 );
164 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
165 ON autnum_overrides(number);
166
167 CREATE TABLE IF NOT EXISTS network_overrides(
168 network inet NOT NULL,
169 country text,
170 is_anonymous_proxy boolean DEFAULT FALSE,
171 is_satellite_provider boolean DEFAULT FALSE,
172 is_anycast boolean DEFAULT FALSE
173 );
174 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
175 ON network_overrides(network);
176 """)
177
178 return db
179
180 def handle_write(self, ns):
181 """
182 Compiles a database in libloc format out of what is in the database
183 """
184 # Allocate a writer
185 writer = location.Writer(ns.signing_key)
186
187 # Set all metadata
188 if ns.vendor:
189 writer.vendor = ns.vendor
190
191 if ns.description:
192 writer.description = ns.description
193
194 if ns.license:
195 writer.license = ns.license
196
197 # Add all Autonomous Systems
198 log.info("Writing Autonomous Systems...")
199
200 # Select all ASes with a name
201 rows = self.db.query("""
202 SELECT
203 autnums.number AS number,
204 COALESCE(
205 (SELECT overrides.name FROM autnum_overrides overrides
206 WHERE overrides.number = autnums.number),
207 autnums.name
208 ) AS name
209 FROM autnums
210 WHERE name <> %s ORDER BY number
211 """, "")
212
213 for row in rows:
214 a = writer.add_as(row.number)
215 a.name = row.name
216
217 # Add all networks
218 log.info("Writing networks...")
219
220 # Select all known networks
221 rows = self.db.query("""
222 SELECT
223 DISTINCT ON (announcements.network)
224 announcements.network AS network,
225 announcements.autnum AS autnum,
226
227 -- Country
228 COALESCE(
229 (
230 SELECT country FROM network_overrides overrides
231 WHERE announcements.network <<= overrides.network
232 ORDER BY masklen(overrides.network) DESC
233 LIMIT 1
234 ),
235 (
236 SELECT country FROM autnum_overrides overrides
237 WHERE announcements.autnum = overrides.number
238 ),
239 networks.country
240 ) AS country,
241
242 -- Must be part of returned values for ORDER BY clause
243 masklen(networks.network) AS sort,
244
245 -- Flags
246 COALESCE(
247 (
248 SELECT is_anonymous_proxy FROM network_overrides overrides
249 WHERE announcements.network <<= overrides.network
250 ORDER BY masklen(overrides.network) DESC
251 LIMIT 1
252 ),
253 (
254 SELECT is_anonymous_proxy FROM autnum_overrides overrides
255 WHERE announcements.autnum = overrides.number
256 )
257 ) AS is_anonymous_proxy,
258 COALESCE(
259 (
260 SELECT is_satellite_provider FROM network_overrides overrides
261 WHERE announcements.network <<= overrides.network
262 ORDER BY masklen(overrides.network) DESC
263 LIMIT 1
264 ),
265 (
266 SELECT is_satellite_provider FROM autnum_overrides overrides
267 WHERE announcements.autnum = overrides.number
268 )
269 ) AS is_satellite_provider,
270 COALESCE(
271 (
272 SELECT is_anycast FROM network_overrides overrides
273 WHERE announcements.network <<= overrides.network
274 ORDER BY masklen(overrides.network) DESC
275 LIMIT 1
276 ),
277 (
278 SELECT is_anycast FROM autnum_overrides overrides
279 WHERE announcements.autnum = overrides.number
280 )
281 ) AS is_anycast
282 FROM announcements
283 LEFT JOIN networks ON announcements.network <<= networks.network
284 ORDER BY announcements.network, sort DESC
285 """)
286
287 for row in rows:
288 network = writer.add_network(row.network)
289
290 # Save AS & country
291 network.asn, network.country_code = row.autnum, row.country
292
293 # Set flags
294 if row.is_anonymous_proxy:
295 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
296
297 if row.is_satellite_provider:
298 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
299
300 if row.is_anycast:
301 network.set_flag(location.NETWORK_FLAG_ANYCAST)
302
303 # Write everything to file
304 log.info("Writing database to file...")
305 for file in ns.file:
306 writer.write(file)
307
308 def handle_update_whois(self, ns):
309 downloader = location.importer.Downloader()
310
311 # Download all sources
312 with self.db.transaction():
313 # Create some temporary tables to store parsed data
314 self.db.execute("""
315 CREATE TEMPORARY TABLE _autnums(number integer, organization text)
316 ON COMMIT DROP;
317 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
318
319 CREATE TEMPORARY TABLE _organizations(handle text, name text)
320 ON COMMIT DROP;
321 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
322 """)
323
324 for source in location.importer.WHOIS_SOURCES:
325 with downloader.request(source, return_blocks=True) as f:
326 for block in f:
327 self._parse_block(block)
328
329 self.db.execute("""
330 INSERT INTO autnums(number, name)
331 SELECT _autnums.number, _organizations.name FROM _autnums
332 LEFT JOIN _organizations ON _autnums.organization = _organizations.handle
333 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
334 """)
335
336 # Download all extended sources
337 for source in location.importer.EXTENDED_SOURCES:
338 with self.db.transaction():
339 # Download data
340 with downloader.request(source) as f:
341 for line in f:
342 self._parse_line(line)
343
344 def _parse_block(self, block):
345 # Get first line to find out what type of block this is
346 line = block[0]
347
348 # aut-num
349 if line.startswith("aut-num:"):
350 return self._parse_autnum_block(block)
351
352 # organisation
353 elif line.startswith("organisation:"):
354 return self._parse_org_block(block)
355
356 def _parse_autnum_block(self, block):
357 autnum = {}
358 for line in block:
359 # Split line
360 key, val = split_line(line)
361
362 if key == "aut-num":
363 m = re.match(r"^(AS|as)(\d+)", val)
364 if m:
365 autnum["asn"] = m.group(2)
366
367 elif key == "org":
368 autnum[key] = val
369
370 # Skip empty objects
371 if not autnum:
372 return
373
374 # Insert into database
375 self.db.execute("INSERT INTO _autnums(number, organization) \
376 VALUES(%s, %s) ON CONFLICT (number) DO UPDATE SET \
377 organization = excluded.organization",
378 autnum.get("asn"), autnum.get("org"),
379 )
380
381 def _parse_org_block(self, block):
382 org = {}
383 for line in block:
384 # Split line
385 key, val = split_line(line)
386
387 if key in ("organisation", "org-name"):
388 org[key] = val
389
390 # Skip empty objects
391 if not org:
392 return
393
394 self.db.execute("INSERT INTO _organizations(handle, name) \
395 VALUES(%s, %s) ON CONFLICT (handle) DO \
396 UPDATE SET name = excluded.name",
397 org.get("organisation"), org.get("org-name"),
398 )
399
400 def _parse_line(self, line):
401 # Skip version line
402 if line.startswith("2"):
403 return
404
405 # Skip comments
406 if line.startswith("#"):
407 return
408
409 try:
410 registry, country_code, type, line = line.split("|", 3)
411 except:
412 log.warning("Could not parse line: %s" % line)
413 return
414
415 # Skip any lines that are for stats only
416 if country_code == "*":
417 return
418
419 if type in ("ipv6", "ipv4"):
420 return self._parse_ip_line(country_code, type, line)
421
422 def _parse_ip_line(self, country, type, line):
423 try:
424 address, prefix, date, status, organization = line.split("|")
425 except ValueError:
426 organization = None
427
428 # Try parsing the line without organization
429 try:
430 address, prefix, date, status = line.split("|")
431 except ValueError:
432 log.warning("Unhandled line format: %s" % line)
433 return
434
435 # Skip anything that isn't properly assigned
436 if not status in ("assigned", "allocated"):
437 return
438
439 # Cast prefix into an integer
440 try:
441 prefix = int(prefix)
442 except:
443 log.warning("Invalid prefix: %s" % prefix)
444 return
445
446 # Fix prefix length for IPv4
447 if type == "ipv4":
448 prefix = 32 - int(math.log(prefix, 2))
449
450 # Try to parse the address
451 try:
452 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
453 except ValueError:
454 log.warning("Invalid IP address: %s" % address)
455 return
456
457 self.db.execute("INSERT INTO networks(network, country) \
458 VALUES(%s, %s) ON CONFLICT (network) DO \
459 UPDATE SET country = excluded.country",
460 "%s" % network, country,
461 )
462
463 def handle_update_announcements(self, ns):
464 server = ns.server[0]
465
466 # Pre-compile regular expression for routes
467 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
468
469 with telnetlib.Telnet(server) as t:
470 # Enable debug mode
471 #if ns.debug:
472 # t.set_debuglevel(10)
473
474 # Wait for console greeting
475 greeting = t.read_until(b"> ", timeout=30)
476 if not greeting:
477 log.error("Could not get a console prompt")
478 return 1
479
480 # Disable pagination
481 t.write(b"terminal length 0\n")
482
483 # Wait for the prompt to return
484 t.read_until(b"> ")
485
486 # Fetch the routing tables
487 with self.db.transaction():
488 for protocol in ("ipv6", "ipv4"):
489 log.info("Requesting %s routing table" % protocol)
490
491 # Request the full unicast routing table
492 t.write(b"show bgp %s unicast\n" % protocol.encode())
493
494 # Read entire header which ends with "Path"
495 t.read_until(b"Path\r\n")
496
497 while True:
498 # Try reading a full entry
499 # Those might be broken across multiple lines but ends with i
500 line = t.read_until(b"i\r\n", timeout=5)
501 if not line:
502 break
503
504 # Show line for debugging
505 #log.debug(repr(line))
506
507 # Try finding a route in here
508 m = route.match(line)
509 if m:
510 network, autnum = m.groups()
511
512 # Convert network to string
513 network = network.decode()
514
515 # Append /24 for IPv4 addresses
516 if not "/" in network and not ":" in network:
517 network = "%s/24" % network
518
519 # Convert AS number to integer
520 autnum = int(autnum)
521
522 log.info("Found announcement for %s by %s" % (network, autnum))
523
524 self.db.execute("INSERT INTO announcements(network, autnum) \
525 VALUES(%s, %s) ON CONFLICT (network) DO \
526 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
527 network, autnum,
528 )
529
530 log.info("Finished reading the %s routing table" % protocol)
531
532 # Purge anything we never want here
533 self.db.execute("""
534 -- Delete default routes
535 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
536
537 -- Delete anything that is not global unicast address space
538 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
539
540 -- DELETE "current network" address space
541 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
542
543 -- DELETE local loopback address space
544 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
545
546 -- DELETE RFC 1918 address space
547 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
548 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
549 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
550
551 -- DELETE test, benchmark and documentation address space
552 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
553 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
554 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
555 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
556 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
557
558 -- DELETE CGNAT address space (RFC 6598)
559 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
560
561 -- DELETE link local address space
562 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
563
564 -- DELETE IPv6 to IPv4 (6to4) address space
565 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
566
567 -- DELETE multicast and reserved address space
568 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
569 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
570
571 -- Delete networks that are too small to be in the global routing table
572 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
573 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
574
575 -- Delete any non-public or reserved ASNs
576 DELETE FROM announcements WHERE NOT (
577 (autnum >= 1 AND autnum <= 23455)
578 OR
579 (autnum >= 23457 AND autnum <= 64495)
580 OR
581 (autnum >= 131072 AND autnum <= 4199999999)
582 );
583
584 -- Delete everything that we have not seen for 14 days
585 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
586 """)
587
588 def handle_update_overrides(self, ns):
589 with self.db.transaction():
590 # Drop all data that we have
591 self.db.execute("""
592 TRUNCATE TABLE autnum_overrides;
593 TRUNCATE TABLE network_overrides;
594 """)
595
596 for file in ns.files:
597 log.info("Reading %s..." % file)
598
599 with open(file, "rb") as f:
600 for type, block in location.importer.read_blocks(f):
601 if type == "net":
602 network = block.get("net")
603 # Try to parse and normalise the network
604 try:
605 network = ipaddress.ip_network(network, strict=False)
606 except ValueError as e:
607 log.warning("Invalid IP network: %s: %s" % (network, e))
608 continue
609
610 # Prevent that we overwrite all networks
611 if network.prefixlen == 0:
612 log.warning("Skipping %s: You cannot overwrite default" % network)
613 continue
614
615 self.db.execute("""
616 INSERT INTO network_overrides(
617 network,
618 country,
619 is_anonymous_proxy,
620 is_satellite_provider,
621 is_anycast
622 ) VALUES (%s, %s, %s, %s, %s)
623 ON CONFLICT (network) DO NOTHING""",
624 "%s" % network,
625 block.get("country"),
626 block.get("is-anonymous-proxy") == "yes",
627 block.get("is-satellite-provider") == "yes",
628 block.get("is-anycast") == "yes",
629 )
630
631 elif type == "aut-num":
632 autnum = block.get("aut-num")
633
634 # Check if AS number begins with "AS"
635 if not autnum.startswith("AS"):
636 log.warning("Invalid AS number: %s" % autnum)
637 continue
638
639 # Strip "AS"
640 autnum = autnum[2:]
641
642 self.db.execute("""
643 INSERT INTO autnum_overrides(
644 number,
645 name,
646 country,
647 is_anonymous_proxy,
648 is_satellite_provider,
649 is_anycast
650 ) VALUES(%s, %s, %s, %s, %s, %s)
651 ON CONFLICT DO NOTHING""",
652 autnum,
653 block.get("name"),
654 block.get("country"),
655 block.get("is-anonymous-proxy") == "yes",
656 block.get("is-satellite-provider") == "yes",
657 block.get("is-anycast") == "yes",
658 )
659
660 else:
661 log.warning("Unsupport type: %s" % type)
662
663
664 def split_line(line):
665 key, colon, val = line.partition(":")
666
667 # Strip any excess space
668 key = key.strip()
669 val = val.strip()
670
671 return key, val
672
673 def main():
674 # Run the command line interface
675 c = CLI()
676 c.run()
677
678 main()