]> git.ipfire.org Git - people/ms/libloc.git/blob - src/python/location-importer.in
57eccd9360e6740e38b89bf392d3503e447d9b31
[people/ms/libloc.git] / src / python / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import logging
23 import math
24 import re
25 import sys
26 import telnetlib
27
28 # Load our location module
29 import location
30 import location.database
31 import location.importer
32 from location.i18n import _
33
34 # Initialise logging
35 log = logging.getLogger("location.importer")
36 log.propagate = 1
37
38 class CLI(object):
39 def parse_cli(self):
40 parser = argparse.ArgumentParser(
41 description=_("Location Importer Command Line Interface"),
42 )
43 subparsers = parser.add_subparsers()
44
45 # Global configuration flags
46 parser.add_argument("--debug", action="store_true",
47 help=_("Enable debug output"))
48 parser.add_argument("--quiet", action="store_true",
49 help=_("Enable quiet mode"))
50
51 # version
52 parser.add_argument("--version", action="version",
53 version="%(prog)s @VERSION@")
54
55 # Database
56 parser.add_argument("--database-host", required=True,
57 help=_("Database Hostname"), metavar=_("HOST"))
58 parser.add_argument("--database-name", required=True,
59 help=_("Database Name"), metavar=_("NAME"))
60 parser.add_argument("--database-username", required=True,
61 help=_("Database Username"), metavar=_("USERNAME"))
62 parser.add_argument("--database-password", required=True,
63 help=_("Database Password"), metavar=_("PASSWORD"))
64
65 # Write Database
66 write = subparsers.add_parser("write", help=_("Write database to file"))
67 write.set_defaults(func=self.handle_write)
68 write.add_argument("file", nargs=1, help=_("Database File"))
69 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
70 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
71 write.add_argument("--description", nargs="?", help=_("Sets a description"))
72 write.add_argument("--license", nargs="?", help=_("Sets the license"))
73
74 # Update WHOIS
75 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
76 update_whois.set_defaults(func=self.handle_update_whois)
77
78 # Update announcements
79 update_announcements = subparsers.add_parser("update-announcements",
80 help=_("Update BGP Annoucements"))
81 update_announcements.set_defaults(func=self.handle_update_announcements)
82 update_announcements.add_argument("server", nargs=1,
83 help=_("Route Server to connect to"), metavar=_("SERVER"))
84
85 # Update overrides
86 update_overrides = subparsers.add_parser("update-overrides",
87 help=_("Update overrides"),
88 )
89 update_overrides.add_argument(
90 "files", nargs="+", help=_("Files to import"),
91 )
92 update_overrides.set_defaults(func=self.handle_update_overrides)
93
94 args = parser.parse_args()
95
96 # Configure logging
97 if args.debug:
98 location.logger.set_level(logging.DEBUG)
99 elif args.quiet:
100 location.logger.set_level(logging.WARNING)
101
102 # Print usage if no action was given
103 if not "func" in args:
104 parser.print_usage()
105 sys.exit(2)
106
107 return args
108
109 def run(self):
110 # Parse command line arguments
111 args = self.parse_cli()
112
113 # Initialise database
114 self.db = self._setup_database(args)
115
116 # Call function
117 ret = args.func(args)
118
119 # Return with exit code
120 if ret:
121 sys.exit(ret)
122
123 # Otherwise just exit
124 sys.exit(0)
125
126 def _setup_database(self, ns):
127 """
128 Initialise the database
129 """
130 # Connect to database
131 db = location.database.Connection(
132 host=ns.database_host, database=ns.database_name,
133 user=ns.database_username, password=ns.database_password,
134 )
135
136 with db.transaction():
137 db.execute("""
138 -- announcements
139 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
140 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
141 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
142 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
143 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
144
145 -- autnums
146 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
147 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
148
149 -- networks
150 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
151 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
152 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
153
154 -- overrides
155 CREATE TABLE IF NOT EXISTS autnum_overrides(
156 number bigint NOT NULL,
157 name text,
158 country text,
159 is_anonymous_proxy boolean DEFAULT FALSE,
160 is_satellite_provider boolean DEFAULT FALSE,
161 is_anycast boolean DEFAULT FALSE
162 );
163 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
164 ON autnum_overrides(number);
165
166 CREATE TABLE IF NOT EXISTS network_overrides(
167 network inet NOT NULL,
168 country text,
169 is_anonymous_proxy boolean DEFAULT FALSE,
170 is_satellite_provider boolean DEFAULT FALSE,
171 is_anycast boolean DEFAULT FALSE
172 );
173 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
174 ON network_overrides(network);
175 """)
176
177 return db
178
179 def handle_write(self, ns):
180 """
181 Compiles a database in libloc format out of what is in the database
182 """
183 # Allocate a writer
184 writer = location.Writer(ns.signing_key)
185
186 # Set all metadata
187 if ns.vendor:
188 writer.vendor = ns.vendor
189
190 if ns.description:
191 writer.description = ns.description
192
193 if ns.license:
194 writer.license = ns.license
195
196 # Add all Autonomous Systems
197 log.info("Writing Autonomous Systems...")
198
199 # Select all ASes with a name
200 rows = self.db.query("""
201 SELECT
202 autnums.number AS number,
203 COALESCE(
204 (SELECT overrides.name FROM autnum_overrides overrides
205 WHERE overrides.number = autnums.number),
206 autnums.name
207 ) AS name
208 FROM autnums
209 WHERE name <> %s ORDER BY number
210 """, "")
211
212 for row in rows:
213 a = writer.add_as(row.number)
214 a.name = row.name
215
216 # Add all networks
217 log.info("Writing networks...")
218
219 # Select all known networks
220 rows = self.db.query("""
221 SELECT
222 DISTINCT ON (announcements.network)
223 announcements.network AS network,
224 announcements.autnum AS autnum,
225
226 -- Country
227 COALESCE(
228 (
229 SELECT country FROM network_overrides overrides
230 WHERE announcements.network <<= overrides.network
231 ORDER BY masklen(overrides.network) DESC
232 LIMIT 1
233 ),
234 (
235 SELECT country FROM autnum_overrides overrides
236 WHERE announcements.autnum = overrides.number
237 ),
238 networks.country
239 ) AS country,
240
241 -- Must be part of returned values for ORDER BY clause
242 masklen(networks.network) AS sort,
243
244 -- Flags
245 COALESCE(
246 (
247 SELECT is_anonymous_proxy FROM network_overrides overrides
248 WHERE announcements.network <<= overrides.network
249 ORDER BY masklen(overrides.network) DESC
250 LIMIT 1
251 ),
252 (
253 SELECT is_anonymous_proxy FROM autnum_overrides overrides
254 WHERE announcements.autnum = overrides.number
255 )
256 ) AS is_anonymous_proxy,
257 COALESCE(
258 (
259 SELECT is_satellite_provider FROM network_overrides overrides
260 WHERE announcements.network <<= overrides.network
261 ORDER BY masklen(overrides.network) DESC
262 LIMIT 1
263 ),
264 (
265 SELECT is_satellite_provider FROM autnum_overrides overrides
266 WHERE announcements.autnum = overrides.number
267 )
268 ) AS is_satellite_provider,
269 COALESCE(
270 (
271 SELECT is_anycast FROM network_overrides overrides
272 WHERE announcements.network <<= overrides.network
273 ORDER BY masklen(overrides.network) DESC
274 LIMIT 1
275 ),
276 (
277 SELECT is_anycast FROM autnum_overrides overrides
278 WHERE announcements.autnum = overrides.number
279 )
280 ) AS is_anycast
281 FROM announcements
282 LEFT JOIN networks ON announcements.network <<= networks.network
283 ORDER BY announcements.network, sort DESC
284 """)
285
286 for row in rows:
287 network = writer.add_network(row.network)
288
289 # Save AS & country
290 network.asn, network.country_code = row.autnum, row.country
291
292 # Set flags
293 if row.is_anonymous_proxy:
294 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
295
296 if row.is_satellite_provider:
297 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
298
299 if row.is_anycast:
300 network.set_flag(location.NETWORK_FLAG_ANYCAST)
301
302 # Write everything to file
303 log.info("Writing database to file...")
304 for file in ns.file:
305 writer.write(file)
306
307 def handle_update_whois(self, ns):
308 downloader = location.importer.Downloader()
309
310 # Download all sources
311 with self.db.transaction():
312 # Create some temporary tables to store parsed data
313 self.db.execute("""
314 CREATE TEMPORARY TABLE _autnums(number integer, organization text)
315 ON COMMIT DROP;
316 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
317
318 CREATE TEMPORARY TABLE _organizations(handle text, name text)
319 ON COMMIT DROP;
320 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
321 """)
322
323 for source in location.importer.WHOIS_SOURCES:
324 with downloader.request(source, return_blocks=True) as f:
325 for block in f:
326 self._parse_block(block)
327
328 self.db.execute("""
329 INSERT INTO autnums(number, name)
330 SELECT _autnums.number, _organizations.name FROM _autnums
331 LEFT JOIN _organizations ON _autnums.organization = _organizations.handle
332 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
333 """)
334
335 # Download all extended sources
336 for source in location.importer.EXTENDED_SOURCES:
337 with self.db.transaction():
338 # Download data
339 with downloader.request(source) as f:
340 for line in f:
341 self._parse_line(line)
342
343 def _parse_block(self, block):
344 # Get first line to find out what type of block this is
345 line = block[0]
346
347 # aut-num
348 if line.startswith("aut-num:"):
349 return self._parse_autnum_block(block)
350
351 # organisation
352 elif line.startswith("organisation:"):
353 return self._parse_org_block(block)
354
355 def _parse_autnum_block(self, block):
356 autnum = {}
357 for line in block:
358 # Split line
359 key, val = split_line(line)
360
361 if key == "aut-num":
362 m = re.match(r"^(AS|as)(\d+)", val)
363 if m:
364 autnum["asn"] = m.group(2)
365
366 elif key == "org":
367 autnum[key] = val
368
369 # Skip empty objects
370 if not autnum:
371 return
372
373 # Insert into database
374 self.db.execute("INSERT INTO _autnums(number, organization) \
375 VALUES(%s, %s) ON CONFLICT (number) DO UPDATE SET \
376 organization = excluded.organization",
377 autnum.get("asn"), autnum.get("org"),
378 )
379
380 def _parse_org_block(self, block):
381 org = {}
382 for line in block:
383 # Split line
384 key, val = split_line(line)
385
386 if key in ("organisation", "org-name"):
387 org[key] = val
388
389 # Skip empty objects
390 if not org:
391 return
392
393 self.db.execute("INSERT INTO _organizations(handle, name) \
394 VALUES(%s, %s) ON CONFLICT (handle) DO \
395 UPDATE SET name = excluded.name",
396 org.get("organisation"), org.get("org-name"),
397 )
398
399 def _parse_line(self, line):
400 # Skip version line
401 if line.startswith("2"):
402 return
403
404 # Skip comments
405 if line.startswith("#"):
406 return
407
408 try:
409 registry, country_code, type, line = line.split("|", 3)
410 except:
411 log.warning("Could not parse line: %s" % line)
412 return
413
414 # Skip any lines that are for stats only
415 if country_code == "*":
416 return
417
418 if type in ("ipv6", "ipv4"):
419 return self._parse_ip_line(country_code, type, line)
420
421 def _parse_ip_line(self, country, type, line):
422 try:
423 address, prefix, date, status, organization = line.split("|")
424 except ValueError:
425 organization = None
426
427 # Try parsing the line without organization
428 try:
429 address, prefix, date, status = line.split("|")
430 except ValueError:
431 log.warning("Unhandled line format: %s" % line)
432 return
433
434 # Skip anything that isn't properly assigned
435 if not status in ("assigned", "allocated"):
436 return
437
438 # Cast prefix into an integer
439 try:
440 prefix = int(prefix)
441 except:
442 log.warning("Invalid prefix: %s" % prefix)
443 return
444
445 # Fix prefix length for IPv4
446 if type == "ipv4":
447 prefix = 32 - int(math.log(prefix, 2))
448
449 # Try to parse the address
450 try:
451 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
452 except ValueError:
453 log.warning("Invalid IP address: %s" % address)
454 return
455
456 self.db.execute("INSERT INTO networks(network, country) \
457 VALUES(%s, %s) ON CONFLICT (network) DO \
458 UPDATE SET country = excluded.country",
459 "%s" % network, country,
460 )
461
462 def handle_update_announcements(self, ns):
463 server = ns.server[0]
464
465 # Pre-compile regular expression for routes
466 #route = re.compile(b"^\*>?\s[\si]?([^\s]+)[.\s]*?(\d+)\si$", re.MULTILINE)
467 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
468
469 with telnetlib.Telnet(server) as t:
470 # Enable debug mode
471 #if ns.debug:
472 # t.set_debuglevel(10)
473
474 # Wait for console greeting
475 greeting = t.read_until(b"> ", timeout=30)
476 if not greeting:
477 log.error("Could not get a console prompt")
478 return 1
479
480 # Disable pagination
481 t.write(b"terminal length 0\n")
482
483 # Wait for the prompt to return
484 t.read_until(b"> ")
485
486 # Fetch the routing tables
487 with self.db.transaction():
488 for protocol in ("ipv6", "ipv4"):
489 log.info("Requesting %s routing table" % protocol)
490
491 # Request the full unicast routing table
492 t.write(b"show bgp %s unicast\n" % protocol.encode())
493
494 # Read entire header which ends with "Path"
495 t.read_until(b"Path\r\n")
496
497 while True:
498 # Try reading a full entry
499 # Those might be broken across multiple lines but ends with i
500 line = t.read_until(b"i\r\n", timeout=5)
501 if not line:
502 break
503
504 # Show line for debugging
505 #log.debug(repr(line))
506
507 # Try finding a route in here
508 m = route.match(line)
509 if m:
510 network, autnum = m.groups()
511
512 # Convert network to string
513 network = network.decode()
514
515 # Append /24 for IPv4 addresses
516 if not "/" in network and not ":" in network:
517 network = "%s/24" % network
518
519 # Convert AS number to integer
520 autnum = int(autnum)
521
522 log.info("Found announcement for %s by %s" % (network, autnum))
523
524 self.db.execute("INSERT INTO announcements(network, autnum) \
525 VALUES(%s, %s) ON CONFLICT (network) DO \
526 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
527 network, autnum,
528 )
529
530 log.info("Finished reading the %s routing table" % protocol)
531
532 # Purge anything we never want here
533 self.db.execute("""
534 -- Delete default routes
535 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
536
537 -- Delete anything that is not global unicast address space
538 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
539
540 -- DELETE "current network" address space
541 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
542
543 -- DELETE local loopback address space
544 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
545
546 -- DELETE RFC 1918 address space
547 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
548 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
549 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
550
551 -- DELETE test, benchmark and documentation address space
552 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
553 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
554 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
555 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
556 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
557
558 -- DELETE CGNAT address space (RFC 6598)
559 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
560
561 -- DELETE link local address space
562 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
563
564 -- DELETE IPv6 to IPv4 (6to4) address space
565 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
566
567 -- DELETE multicast and reserved address space
568 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
569 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
570
571 -- Delete networks that are too small to be in the global routing table
572 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
573 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
574
575 -- Delete any non-public or reserved ASNs
576 DELETE FROM announcements WHERE NOT (
577 (autnum >= 1 AND autnum <= 23455)
578 OR
579 (autnum >= 23457 AND autnum <= 64495)
580 OR
581 (autnum >= 131072 AND autnum <= 4199999999)
582 );
583
584 -- Delete everything that we have not seen for 14 days
585 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
586 """)
587
588 def handle_update_overrides(self, ns):
589 with self.db.transaction():
590 # Drop all data that we have
591 self.db.execute("""
592 TRUNCATE TABLE autnum_overrides;
593 TRUNCATE TABLE network_overrides;
594 """)
595
596 for file in ns.files:
597 log.info("Reading %s..." % file)
598
599 with open(file, "rb") as f:
600 for type, block in location.importer.read_blocks(f):
601 if type == "net":
602 network = block.get("net")
603 # Try to parse and normalise the network
604 try:
605 network = ipaddress.ip_network(network, strict=False)
606 except ValueError as e:
607 log.warning("Invalid IP network: %s: %s" % (network, e))
608 continue
609
610 # Prevent that we overwrite all networks
611 if network.prefixlen == 0:
612 log.warning("Skipping %s: You cannot overwrite default" % network)
613 continue
614
615 self.db.execute("""
616 INSERT INTO network_overrides(
617 network,
618 country,
619 is_anonymous_proxy,
620 is_satellite_provider,
621 is_anycast
622 ) VALUES (%s, %s, %s, %s, %s)
623 ON CONFLICT (network) DO NOTHING""",
624 "%s" % network,
625 block.get("country"),
626 block.get("is-anonymous-proxy") == "yes",
627 block.get("is-satellite-provider") == "yes",
628 block.get("is-anycast") == "yes",
629 )
630
631 elif type == "aut-num":
632 autnum = block.get("aut-num")
633
634 # Check if AS number begins with "AS"
635 if not autnum.startswith("AS"):
636 log.warning("Invalid AS number: %s" % autnum)
637 continue
638
639 # Strip "AS"
640 autnum = autnum[2:]
641
642 self.db.execute("""
643 INSERT INTO autnum_overrides(
644 number,
645 name,
646 country,
647 is_anonymous_proxy,
648 is_satellite_provider,
649 is_anycast
650 ) VALUES(%s, %s, %s, %s, %s, %s)
651 ON CONFLICT DO NOTHING""",
652 autnum,
653 block.get("name"),
654 block.get("country"),
655 block.get("is-anonymous-proxy") == "yes",
656 block.get("is-satellite-provider") == "yes",
657 block.get("is-anycast") == "yes",
658 )
659
660 else:
661 log.warning("Unsupport type: %s" % type)
662
663
664 def split_line(line):
665 key, colon, val = line.partition(":")
666
667 # Strip any excess space
668 key = key.strip()
669 val = val.strip()
670
671 return key, val
672
673 def main():
674 # Run the command line interface
675 c = CLI()
676 c.run()
677
678 main()