]> git.ipfire.org Git - location/libloc.git/blob - src/python/location-importer.in
Add quiet mode
[location/libloc.git] / src / python / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import logging
23 import math
24 import re
25 import sys
26 import telnetlib
27
28 # Load our location module
29 import location
30 import location.database
31 import location.importer
32 from location.i18n import _
33
34 # Initialise logging
35 log = logging.getLogger("location.importer")
36 log.propagate = 1
37
38 class CLI(object):
39 def parse_cli(self):
40 parser = argparse.ArgumentParser(
41 description=_("Location Importer Command Line Interface"),
42 )
43 subparsers = parser.add_subparsers()
44
45 # Global configuration flags
46 parser.add_argument("--debug", action="store_true",
47 help=_("Enable debug output"))
48 parser.add_argument("--quiet", action="store_true",
49 help=_("Enable quiet mode"))
50
51 # version
52 parser.add_argument("--version", action="version",
53 version="%(prog)s @VERSION@")
54
55 # Database
56 parser.add_argument("--database-host", required=True,
57 help=_("Database Hostname"), metavar=_("HOST"))
58 parser.add_argument("--database-name", required=True,
59 help=_("Database Name"), metavar=_("NAME"))
60 parser.add_argument("--database-username", required=True,
61 help=_("Database Username"), metavar=_("USERNAME"))
62 parser.add_argument("--database-password", required=True,
63 help=_("Database Password"), metavar=_("PASSWORD"))
64
65 # Write Database
66 write = subparsers.add_parser("write", help=_("Write database to file"))
67 write.set_defaults(func=self.handle_write)
68 write.add_argument("file", nargs=1, help=_("Database File"))
69 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
70 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
71 write.add_argument("--description", nargs="?", help=_("Sets a description"))
72 write.add_argument("--license", nargs="?", help=_("Sets the license"))
73
74 # Update WHOIS
75 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
76 update_whois.set_defaults(func=self.handle_update_whois)
77
78 # Update announcements
79 update_announcements = subparsers.add_parser("update-announcements",
80 help=_("Update BGP Annoucements"))
81 update_announcements.set_defaults(func=self.handle_update_announcements)
82 update_announcements.add_argument("server", nargs=1,
83 help=_("Route Server to connect to"), metavar=_("SERVER"))
84
85 # Update overrides
86 update_overrides = subparsers.add_parser("update-overrides",
87 help=_("Update overrides"),
88 )
89 update_overrides.add_argument(
90 "files", nargs="+", help=_("Files to import"),
91 )
92 update_overrides.set_defaults(func=self.handle_update_overrides)
93
94 args = parser.parse_args()
95
96 # Configure logging
97 if args.debug:
98 location.logger.set_level(logging.DEBUG)
99 elif args.quiet:
100 location.logger.set_level(logging.WARNING)
101
102 # Print usage if no action was given
103 if not "func" in args:
104 parser.print_usage()
105 sys.exit(2)
106
107 return args
108
109 def run(self):
110 # Parse command line arguments
111 args = self.parse_cli()
112
113 # Initialise database
114 self.db = self._setup_database(args)
115
116 # Call function
117 ret = args.func(args)
118
119 # Return with exit code
120 if ret:
121 sys.exit(ret)
122
123 # Otherwise just exit
124 sys.exit(0)
125
126 def _setup_database(self, ns):
127 """
128 Initialise the database
129 """
130 # Connect to database
131 db = location.database.Connection(
132 host=ns.database_host, database=ns.database_name,
133 user=ns.database_username, password=ns.database_password,
134 )
135
136 with db.transaction():
137 db.execute("""
138 -- announcements
139 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
140 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
141 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
142 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
143 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
144
145 -- autnums
146 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
147 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
148
149 -- networks
150 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
151 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
152 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
153
154 -- overrides
155 CREATE TABLE IF NOT EXISTS autnum_overrides(
156 number bigint NOT NULL,
157 name text,
158 is_anonymous_proxy boolean DEFAULT FALSE,
159 is_satellite_provider boolean DEFAULT FALSE,
160 is_anycast boolean DEFAULT FALSE
161 );
162 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
163 ON autnum_overrides(number);
164
165 CREATE TABLE IF NOT EXISTS network_overrides(
166 network inet NOT NULL,
167 country text,
168 is_anonymous_proxy boolean DEFAULT FALSE,
169 is_satellite_provider boolean DEFAULT FALSE,
170 is_anycast boolean DEFAULT FALSE
171 );
172 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
173 ON network_overrides(network);
174 """)
175
176 return db
177
178 def handle_write(self, ns):
179 """
180 Compiles a database in libloc format out of what is in the database
181 """
182 print(ns)
183
184 # Allocate a writer
185 writer = location.Writer(ns.signing_key)
186
187 # Set all metadata
188 if ns.vendor:
189 writer.vendor = ns.vendor
190
191 if ns.description:
192 writer.description = ns.description
193
194 if ns.license:
195 writer.license = ns.license
196
197 # Add all Autonomous Systems
198 log.info("Writing Autonomous Systems...")
199
200 # Select all ASes with a name
201 rows = self.db.query("""
202 SELECT
203 autnums.number AS number,
204 COALESCE(
205 (SELECT overrides.name FROM autnum_overrides overrides
206 WHERE overrides.number = autnums.number),
207 autnums.name
208 ) AS name
209 FROM autnums
210 WHERE name <> %s ORDER BY number
211 """, "")
212
213 for row in rows:
214 a = writer.add_as(row.number)
215 a.name = row.name
216
217 # Add all networks
218 log.info("Writing networks...")
219
220 # Select all known networks
221 rows = self.db.query("""
222 SELECT
223 DISTINCT ON (announcements.network)
224 announcements.network AS network,
225 announcements.autnum AS autnum,
226 networks.country AS country,
227
228 -- Must be part of returned values for ORDER BY clause
229 masklen(networks.network) AS sort,
230
231 -- Flags
232 COALESCE(
233 (
234 SELECT is_anonymous_proxy FROM network_overrides overrides
235 WHERE announcements.network <<= overrides.network
236 ORDER BY masklen(overrides.network) DESC
237 LIMIT 1
238 ),
239 (
240 SELECT is_anonymous_proxy FROM autnum_overrides overrides
241 WHERE announcements.autnum = overrides.number
242 )
243 ) AS is_anonymous_proxy,
244 COALESCE(
245 (
246 SELECT is_satellite_provider FROM network_overrides overrides
247 WHERE announcements.network <<= overrides.network
248 ORDER BY masklen(overrides.network) DESC
249 LIMIT 1
250 ),
251 (
252 SELECT is_satellite_provider FROM autnum_overrides overrides
253 WHERE announcements.autnum = overrides.number
254 )
255 ) AS is_satellite_provider,
256 COALESCE(
257 (
258 SELECT is_anycast FROM network_overrides overrides
259 WHERE announcements.network <<= overrides.network
260 ORDER BY masklen(overrides.network) DESC
261 LIMIT 1
262 ),
263 (
264 SELECT is_anycast FROM autnum_overrides overrides
265 WHERE announcements.autnum = overrides.number
266 )
267 ) AS is_anycast
268 FROM announcements
269 LEFT JOIN networks ON announcements.network <<= networks.network
270 ORDER BY announcements.network, sort DESC
271 """)
272
273 for row in rows:
274 network = writer.add_network(row.network)
275
276 # Save AS & country
277 network.asn, network.country_code = row.autnum, row.country
278
279 # Set flags
280 if row.is_anonymous_proxy:
281 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
282
283 if row.is_satellite_provider:
284 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
285
286 if row.is_anycast:
287 network.set_flag(location.NETWORK_FLAG_ANYCAST)
288
289 # Write everything to file
290 log.info("Writing database to file...")
291 for file in ns.file:
292 writer.write(file)
293
294 def handle_update_whois(self, ns):
295 downloader = location.importer.Downloader()
296
297 # Download all sources
298 with self.db.transaction():
299 # Create some temporary tables to store parsed data
300 self.db.execute("""
301 CREATE TEMPORARY TABLE _autnums(number integer, organization text)
302 ON COMMIT DROP;
303 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
304
305 CREATE TEMPORARY TABLE _organizations(handle text, name text)
306 ON COMMIT DROP;
307 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
308 """)
309
310 for source in location.importer.WHOIS_SOURCES:
311 with downloader.request(source, return_blocks=True) as f:
312 for block in f:
313 self._parse_block(block)
314
315 self.db.execute("""
316 INSERT INTO autnums(number, name)
317 SELECT _autnums.number, _organizations.name FROM _autnums
318 LEFT JOIN _organizations ON _autnums.organization = _organizations.handle
319 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
320 """)
321
322 # Download all extended sources
323 for source in location.importer.EXTENDED_SOURCES:
324 with self.db.transaction():
325 # Download data
326 with downloader.request(source) as f:
327 for line in f:
328 self._parse_line(line)
329
330 def _parse_block(self, block):
331 # Get first line to find out what type of block this is
332 line = block[0]
333
334 # aut-num
335 if line.startswith("aut-num:"):
336 return self._parse_autnum_block(block)
337
338 # organisation
339 elif line.startswith("organisation:"):
340 return self._parse_org_block(block)
341
342 def _parse_autnum_block(self, block):
343 autnum = {}
344 for line in block:
345 # Split line
346 key, val = split_line(line)
347
348 if key == "aut-num":
349 m = re.match(r"^(AS|as)(\d+)", val)
350 if m:
351 autnum["asn"] = m.group(2)
352
353 elif key == "org":
354 autnum[key] = val
355
356 # Skip empty objects
357 if not autnum:
358 return
359
360 # Insert into database
361 self.db.execute("INSERT INTO _autnums(number, organization) \
362 VALUES(%s, %s) ON CONFLICT (number) DO UPDATE SET \
363 organization = excluded.organization",
364 autnum.get("asn"), autnum.get("org"),
365 )
366
367 def _parse_org_block(self, block):
368 org = {}
369 for line in block:
370 # Split line
371 key, val = split_line(line)
372
373 if key in ("organisation", "org-name"):
374 org[key] = val
375
376 # Skip empty objects
377 if not org:
378 return
379
380 self.db.execute("INSERT INTO _organizations(handle, name) \
381 VALUES(%s, %s) ON CONFLICT (handle) DO \
382 UPDATE SET name = excluded.name",
383 org.get("organisation"), org.get("org-name"),
384 )
385
386 def _parse_line(self, line):
387 # Skip version line
388 if line.startswith("2"):
389 return
390
391 # Skip comments
392 if line.startswith("#"):
393 return
394
395 try:
396 registry, country_code, type, line = line.split("|", 3)
397 except:
398 log.warning("Could not parse line: %s" % line)
399 return
400
401 # Skip any lines that are for stats only
402 if country_code == "*":
403 return
404
405 if type in ("ipv6", "ipv4"):
406 return self._parse_ip_line(country_code, type, line)
407
408 def _parse_ip_line(self, country, type, line):
409 try:
410 address, prefix, date, status, organization = line.split("|")
411 except ValueError:
412 organization = None
413
414 # Try parsing the line without organization
415 try:
416 address, prefix, date, status = line.split("|")
417 except ValueError:
418 log.warning("Unhandled line format: %s" % line)
419 return
420
421 # Skip anything that isn't properly assigned
422 if not status in ("assigned", "allocated"):
423 return
424
425 # Cast prefix into an integer
426 try:
427 prefix = int(prefix)
428 except:
429 log.warning("Invalid prefix: %s" % prefix)
430 return
431
432 # Fix prefix length for IPv4
433 if type == "ipv4":
434 prefix = 32 - int(math.log(prefix, 2))
435
436 # Try to parse the address
437 try:
438 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
439 except ValueError:
440 log.warning("Invalid IP address: %s" % address)
441 return
442
443 self.db.execute("INSERT INTO networks(network, country) \
444 VALUES(%s, %s) ON CONFLICT (network) DO \
445 UPDATE SET country = excluded.country",
446 "%s" % network, country,
447 )
448
449 def handle_update_announcements(self, ns):
450 server = ns.server[0]
451
452 # Pre-compile regular expression for routes
453 #route = re.compile(b"^\*>?\s[\si]?([^\s]+)[.\s]*?(\d+)\si$", re.MULTILINE)
454 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
455
456 with telnetlib.Telnet(server) as t:
457 # Enable debug mode
458 #if ns.debug:
459 # t.set_debuglevel(10)
460
461 # Wait for console greeting
462 greeting = t.read_until(b"> ")
463 log.debug(greeting.decode())
464
465 # Disable pagination
466 t.write(b"terminal length 0\n")
467
468 # Wait for the prompt to return
469 t.read_until(b"> ")
470
471 # Fetch the routing tables
472 with self.db.transaction():
473 for protocol in ("ipv6", "ipv4"):
474 log.info("Requesting %s routing table" % protocol)
475
476 # Request the full unicast routing table
477 t.write(b"show bgp %s unicast\n" % protocol.encode())
478
479 # Read entire header which ends with "Path"
480 t.read_until(b"Path\r\n")
481
482 while True:
483 # Try reading a full entry
484 # Those might be broken across multiple lines but ends with i
485 line = t.read_until(b"i\r\n", timeout=5)
486 if not line:
487 break
488
489 # Show line for debugging
490 #log.debug(repr(line))
491
492 # Try finding a route in here
493 m = route.match(line)
494 if m:
495 network, autnum = m.groups()
496
497 # Convert network to string
498 network = network.decode()
499
500 # Append /24 for IPv4 addresses
501 if not "/" in network and not ":" in network:
502 network = "%s/24" % network
503
504 # Convert AS number to integer
505 autnum = int(autnum)
506
507 log.info("Found announcement for %s by %s" % (network, autnum))
508
509 self.db.execute("INSERT INTO announcements(network, autnum) \
510 VALUES(%s, %s) ON CONFLICT (network) DO \
511 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
512 network, autnum,
513 )
514
515 log.info("Finished reading the %s routing table" % protocol)
516
517 # Purge anything we never want here
518 self.db.execute("""
519 -- Delete default routes
520 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
521
522 -- Delete anything that is not global unicast address space
523 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
524
525 -- DELETE "current network" address space
526 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
527
528 -- DELETE local loopback address space
529 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
530
531 -- DELETE RFC 1918 address space
532 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
533 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
534 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
535
536 -- DELETE test, benchmark and documentation address space
537 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
538 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
539 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
540 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
541 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
542
543 -- DELETE CGNAT address space (RFC 6598)
544 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
545
546 -- DELETE link local address space
547 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
548
549 -- DELETE IPv6 to IPv4 (6to4) address space
550 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
551
552 -- DELETE multicast and reserved address space
553 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
554 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
555
556 -- Delete networks that are too small to be in the global routing table
557 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
558 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
559
560 -- Delete any non-public or reserved ASNs
561 DELETE FROM announcements WHERE NOT (
562 (autnum >= 1 AND autnum <= 23455)
563 OR
564 (autnum >= 23457 AND autnum <= 64495)
565 OR
566 (autnum >= 131072 AND autnum <= 4199999999)
567 );
568
569 -- Delete everything that we have not seen for 14 days
570 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
571 """)
572
573 def handle_update_overrides(self, ns):
574 with self.db.transaction():
575 # Drop all data that we have
576 self.db.execute("""
577 TRUNCATE TABLE autnum_overrides;
578 TRUNCATE TABLE network_overrides;
579 """)
580
581 for file in ns.files:
582 log.info("Reading %s..." % file)
583
584 with open(file, "rb") as f:
585 for type, block in location.importer.read_blocks(f):
586 if type == "net":
587 network = block.get("net")
588 # Try to parse and normalise the network
589 try:
590 network = ipaddress.ip_network(network, strict=False)
591 except ValueError as e:
592 log.warning("Invalid IP network: %s: %s" % (network, e))
593 continue
594
595 self.db.execute("""
596 INSERT INTO network_overrides(
597 network,
598 country,
599 is_anonymous_proxy,
600 is_satellite_provider,
601 is_anycast
602 ) VALUES (%s, %s, %s, %s, %s)
603 ON CONFLICT (network) DO NOTHING""",
604 "%s" % network,
605 block.get("country"),
606 block.get("is-anonymous-proxy") == "yes",
607 block.get("is-satellite-provider") == "yes",
608 block.get("is-anycast") == "yes",
609 )
610
611 elif type == "autnum":
612 autnum = block.get("autnum")
613
614 # Check if AS number begins with "AS"
615 if not autnum.startswith("AS"):
616 log.warning("Invalid AS number: %s" % autnum)
617 continue
618
619 # Strip "AS"
620 autnum = autnum[2:]
621
622 self.db.execute("""
623 INSERT INTO autnum_overrides(
624 number,
625 name,
626 is_anonymous_proxy,
627 is_satellite_provider,
628 is_anycast
629 ) VALUES(%s, %s, %s, %s, %s)
630 ON CONFLICT DO NOTHING""",
631 autnum, block.get("name"),
632 block.get("is-anonymous-proxy") == "yes",
633 block.get("is-satellite-provider") == "yes",
634 block.get("is-anycast") == "yes",
635 )
636
637 else:
638 log.warning("Unsupport type: %s" % type)
639
640
641 def split_line(line):
642 key, colon, val = line.partition(":")
643
644 # Strip any excess space
645 key = key.strip()
646 val = val.strip()
647
648 return key, val
649
650 def main():
651 # Run the command line interface
652 c = CLI()
653 c.run()
654
655 main()