]> git.ipfire.org Git - location/libloc.git/blame - src/python/location-importer.in
location-import: Abort when we could not parse the prefix
[location/libloc.git] / src / python / location-importer.in
CommitLineData
78ff0cf2
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
6# Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import argparse
6ffd06b5 21import ipaddress
78ff0cf2 22import logging
6ffd06b5
MT
23import math
24import re
78ff0cf2 25import sys
83d61c46 26import telnetlib
78ff0cf2
MT
27
28# Load our location module
29import location
29c6fa22 30import location.database
3192b66c 31import location.importer
78ff0cf2
MT
32from location.i18n import _
33
34# Initialise logging
35log = logging.getLogger("location.importer")
36log.propagate = 1
37
38class CLI(object):
39 def parse_cli(self):
40 parser = argparse.ArgumentParser(
41 description=_("Location Importer Command Line Interface"),
42 )
6ffd06b5 43 subparsers = parser.add_subparsers()
78ff0cf2
MT
44
45 # Global configuration flags
46 parser.add_argument("--debug", action="store_true",
47 help=_("Enable debug output"))
48
49 # version
50 parser.add_argument("--version", action="version",
51 version="%(prog)s @VERSION@")
52
29c6fa22
MT
53 # Database
54 parser.add_argument("--database-host", required=True,
55 help=_("Database Hostname"), metavar=_("HOST"))
56 parser.add_argument("--database-name", required=True,
57 help=_("Database Name"), metavar=_("NAME"))
58 parser.add_argument("--database-username", required=True,
59 help=_("Database Username"), metavar=_("USERNAME"))
60 parser.add_argument("--database-password", required=True,
61 help=_("Database Password"), metavar=_("PASSWORD"))
62
0983f3dd
MT
63 # Write Database
64 write = subparsers.add_parser("write", help=_("Write database to file"))
65 write.set_defaults(func=self.handle_write)
66 write.add_argument("file", nargs=1, help=_("Database File"))
67 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
68 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
69 write.add_argument("--description", nargs="?", help=_("Sets a description"))
70 write.add_argument("--license", nargs="?", help=_("Sets the license"))
71
6ffd06b5
MT
72 # Update WHOIS
73 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
74 update_whois.set_defaults(func=self.handle_update_whois)
75
83d61c46
MT
76 # Update announcements
77 update_announcements = subparsers.add_parser("update-announcements",
78 help=_("Update BGP Annoucements"))
79 update_announcements.set_defaults(func=self.handle_update_announcements)
80 update_announcements.add_argument("server", nargs=1,
81 help=_("Route Server to connect to"), metavar=_("SERVER"))
82
d7fc3057
MT
83 # Update overrides
84 update_overrides = subparsers.add_parser("update-overrides",
85 help=_("Update overrides"),
86 )
87 update_overrides.add_argument(
88 "files", nargs="+", help=_("Files to import"),
89 )
90 update_overrides.set_defaults(func=self.handle_update_overrides)
91
78ff0cf2
MT
92 args = parser.parse_args()
93
94 # Enable debug logging
95 if args.debug:
96 log.setLevel(logging.DEBUG)
97
6ffd06b5
MT
98 # Print usage if no action was given
99 if not "func" in args:
100 parser.print_usage()
101 sys.exit(2)
102
78ff0cf2
MT
103 return args
104
105 def run(self):
106 # Parse command line arguments
107 args = self.parse_cli()
108
29c6fa22 109 # Initialise database
6ffd06b5 110 self.db = self._setup_database(args)
29c6fa22 111
78ff0cf2 112 # Call function
6ffd06b5 113 ret = args.func(args)
78ff0cf2
MT
114
115 # Return with exit code
116 if ret:
117 sys.exit(ret)
118
119 # Otherwise just exit
120 sys.exit(0)
121
29c6fa22
MT
122 def _setup_database(self, ns):
123 """
124 Initialise the database
125 """
126 # Connect to database
127 db = location.database.Connection(
128 host=ns.database_host, database=ns.database_name,
129 user=ns.database_username, password=ns.database_password,
130 )
131
132 with db.transaction():
133 db.execute("""
83d61c46
MT
134 -- announcements
135 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
136 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
137 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
138 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
139 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
140
6ffd06b5 141 -- autnums
0983f3dd 142 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
6ffd06b5
MT
143 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
144
429a43d1 145 -- networks
83d61c46 146 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
429a43d1 147 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
83d61c46 148 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
d7fc3057
MT
149
150 -- overrides
151 CREATE TABLE IF NOT EXISTS autnum_overrides(
152 number bigint NOT NULL,
153 name text,
154 is_anonymous_proxy boolean DEFAULT FALSE,
155 is_satellite_provider boolean DEFAULT FALSE,
156 is_anycast boolean DEFAULT FALSE
157 );
158 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
159 ON autnum_overrides(number);
160
161 CREATE TABLE IF NOT EXISTS network_overrides(
162 network inet NOT NULL,
163 country text,
164 is_anonymous_proxy boolean DEFAULT FALSE,
165 is_satellite_provider boolean DEFAULT FALSE,
166 is_anycast boolean DEFAULT FALSE
167 );
168 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
169 ON network_overrides(network);
29c6fa22
MT
170 """)
171
172 return db
173
0983f3dd
MT
174 def handle_write(self, ns):
175 """
176 Compiles a database in libloc format out of what is in the database
177 """
178 print(ns)
179
180 # Allocate a writer
181 writer = location.Writer(ns.signing_key)
182
183 # Set all metadata
184 if ns.vendor:
185 writer.vendor = ns.vendor
186
187 if ns.description:
188 writer.description = ns.description
189
190 if ns.license:
191 writer.license = ns.license
192
193 # Add all Autonomous Systems
194 log.info("Writing Autonomous Systems...")
195
196 # Select all ASes with a name
6e97c44b
MT
197 rows = self.db.query("""
198 SELECT
199 autnums.number AS number,
200 COALESCE(
201 (SELECT overrides.name FROM autnum_overrides overrides
202 WHERE overrides.number = autnums.number),
203 autnums.name
204 ) AS name
205 FROM autnums
206 WHERE name <> %s ORDER BY number
207 """, "")
0983f3dd
MT
208
209 for row in rows:
210 a = writer.add_as(row.number)
211 a.name = row.name
212
213 # Add all networks
214 log.info("Writing networks...")
215
216 # Select all known networks
217 rows = self.db.query("""
218 SELECT
219 announcements.network AS network,
220 announcements.autnum AS autnum,
221 (
222 SELECT networks.country FROM networks
223 WHERE announcements.network <<= networks.network
b6fdae1e
MT
224 ORDER BY masklen(networks.network) DESC
225 LIMIT 1
0983f3dd
MT
226 ) AS country,
227
228 -- Flags
1422b5d4
MT
229 COALESCE(
230 (
231 SELECT is_anonymous_proxy FROM network_overrides overrides
232 WHERE announcements.network <<= overrides.network
233 ORDER BY masklen(overrides.network) DESC
234 LIMIT 1
235 ),
236 (
237 SELECT is_anonymous_proxy FROM autnum_overrides overrides
238 WHERE announcements.autnum = overrides.number
239 )
240 ) AS is_anonymous_proxy,
241 COALESCE(
242 (
243 SELECT is_satellite_provider FROM network_overrides overrides
244 WHERE announcements.network <<= overrides.network
245 ORDER BY masklen(overrides.network) DESC
246 LIMIT 1
247 ),
248 (
249 SELECT is_satellite_provider FROM autnum_overrides overrides
250 WHERE announcements.autnum = overrides.number
251 )
252 ) AS is_satellite_provider,
253 COALESCE(
254 (
255 SELECT is_anycast FROM network_overrides overrides
256 WHERE announcements.network <<= overrides.network
257 ORDER BY masklen(overrides.network) DESC
258 LIMIT 1
259 ),
260 (
261 SELECT is_anycast FROM autnum_overrides overrides
262 WHERE announcements.autnum = overrides.number
263 )
264 ) AS is_anycast
0983f3dd
MT
265 FROM announcements
266 """)
267
268 for row in rows:
269 network = writer.add_network(row.network)
270
271 # Save AS & country
272 network.asn, network.country_code = row.autnum, row.country
273
274 # Set flags
275 if row.is_anonymous_proxy:
276 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
277
278 if row.is_satellite_provider:
279 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
280
281 if row.is_anycast:
282 network.set_flag(location.NETWORK_FLAG_ANYCAST)
283
284 # Write everything to file
285 log.info("Writing database to file...")
286 for file in ns.file:
287 writer.write(file)
288
6ffd06b5
MT
289 def handle_update_whois(self, ns):
290 downloader = location.importer.Downloader()
291
292 # Download all sources
0365119d
MT
293 with self.db.transaction():
294 # Create some temporary tables to store parsed data
295 self.db.execute("""
296 CREATE TEMPORARY TABLE _autnums(number integer, organization text)
297 ON COMMIT DROP;
298 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
299
300 CREATE TEMPORARY TABLE _organizations(handle text, name text)
301 ON COMMIT DROP;
302 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
303 """)
304
305 for source in location.importer.WHOIS_SOURCES:
6ffd06b5
MT
306 with downloader.request(source, return_blocks=True) as f:
307 for block in f:
308 self._parse_block(block)
309
0365119d
MT
310 self.db.execute("""
311 INSERT INTO autnums(number, name)
312 SELECT _autnums.number, _organizations.name FROM _autnums
313 LEFT JOIN _organizations ON _autnums.organization = _organizations.handle
314 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
315 """)
316
429a43d1
MT
317 # Download all extended sources
318 for source in location.importer.EXTENDED_SOURCES:
319 with self.db.transaction():
429a43d1
MT
320 # Download data
321 with downloader.request(source) as f:
322 for line in f:
323 self._parse_line(line)
324
6ffd06b5
MT
325 def _parse_block(self, block):
326 # Get first line to find out what type of block this is
327 line = block[0]
328
6ffd06b5 329 # aut-num
429a43d1 330 if line.startswith("aut-num:"):
6ffd06b5
MT
331 return self._parse_autnum_block(block)
332
333 # organisation
334 elif line.startswith("organisation:"):
335 return self._parse_org_block(block)
336
6ffd06b5 337 def _parse_autnum_block(self, block):
6ffd06b5
MT
338 autnum = {}
339 for line in block:
340 # Split line
341 key, val = split_line(line)
342
343 if key == "aut-num":
344 m = re.match(r"^(AS|as)(\d+)", val)
345 if m:
346 autnum["asn"] = m.group(2)
347
0365119d 348 elif key == "org":
6ffd06b5
MT
349 autnum[key] = val
350
351 # Skip empty objects
352 if not autnum:
353 return
354
355 # Insert into database
0365119d
MT
356 self.db.execute("INSERT INTO _autnums(number, organization) \
357 VALUES(%s, %s) ON CONFLICT (number) DO UPDATE SET \
358 organization = excluded.organization",
359 autnum.get("asn"), autnum.get("org"),
6ffd06b5
MT
360 )
361
6ffd06b5
MT
362 def _parse_org_block(self, block):
363 org = {}
364 for line in block:
365 # Split line
366 key, val = split_line(line)
367
0365119d 368 if key in ("organisation", "org-name"):
6ffd06b5
MT
369 org[key] = val
370
371 # Skip empty objects
372 if not org:
373 return
374
0365119d
MT
375 self.db.execute("INSERT INTO _organizations(handle, name) \
376 VALUES(%s, %s) ON CONFLICT (handle) DO \
377 UPDATE SET name = excluded.name",
378 org.get("organisation"), org.get("org-name"),
6ffd06b5
MT
379 )
380
429a43d1
MT
381 def _parse_line(self, line):
382 # Skip version line
383 if line.startswith("2"):
384 return
6ffd06b5 385
429a43d1
MT
386 # Skip comments
387 if line.startswith("#"):
388 return
6ffd06b5 389
429a43d1
MT
390 try:
391 registry, country_code, type, line = line.split("|", 3)
392 except:
393 log.warning("Could not parse line: %s" % line)
394 return
6ffd06b5 395
429a43d1
MT
396 # Skip any lines that are for stats only
397 if country_code == "*":
6ffd06b5
MT
398 return
399
429a43d1
MT
400 if type in ("ipv6", "ipv4"):
401 return self._parse_ip_line(country_code, type, line)
402
429a43d1
MT
403 def _parse_ip_line(self, country, type, line):
404 try:
405 address, prefix, date, status, organization = line.split("|")
406 except ValueError:
407 organization = None
408
409 # Try parsing the line without organization
410 try:
411 address, prefix, date, status = line.split("|")
412 except ValueError:
413 log.warning("Unhandled line format: %s" % line)
414 return
415
416 # Skip anything that isn't properly assigned
417 if not status in ("assigned", "allocated"):
418 return
419
420 # Cast prefix into an integer
421 try:
422 prefix = int(prefix)
423 except:
424 log.warning("Invalid prefix: %s" % prefix)
7177031f 425 return
429a43d1
MT
426
427 # Fix prefix length for IPv4
428 if type == "ipv4":
429 prefix = 32 - int(math.log(prefix, 2))
430
431 # Try to parse the address
432 try:
433 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
434 except ValueError:
435 log.warning("Invalid IP address: %s" % address)
436 return
437
87b3e102
MT
438 self.db.execute("INSERT INTO networks(network, country) \
439 VALUES(%s, %s) ON CONFLICT (network) DO \
440 UPDATE SET country = excluded.country",
441 "%s" % network, country,
6ffd06b5
MT
442 )
443
83d61c46
MT
444 def handle_update_announcements(self, ns):
445 server = ns.server[0]
446
447 # Pre-compile regular expression for routes
448 #route = re.compile(b"^\*>?\s[\si]?([^\s]+)[.\s]*?(\d+)\si$", re.MULTILINE)
449 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
450
451 with telnetlib.Telnet(server) as t:
452 # Enable debug mode
453 #if ns.debug:
454 # t.set_debuglevel(10)
455
456 # Wait for console greeting
457 greeting = t.read_until(b"> ")
458 log.debug(greeting.decode())
459
460 # Disable pagination
461 t.write(b"terminal length 0\n")
462
463 # Wait for the prompt to return
464 t.read_until(b"> ")
465
466 # Fetch the routing tables
467 with self.db.transaction():
468 for protocol in ("ipv6", "ipv4"):
469 log.info("Requesting %s routing table" % protocol)
470
471 # Request the full unicast routing table
472 t.write(b"show bgp %s unicast\n" % protocol.encode())
473
474 # Read entire header which ends with "Path"
475 t.read_until(b"Path\r\n")
476
477 while True:
478 # Try reading a full entry
479 # Those might be broken across multiple lines but ends with i
480 line = t.read_until(b"i\r\n", timeout=5)
481 if not line:
482 break
483
484 # Show line for debugging
485 #log.debug(repr(line))
486
487 # Try finding a route in here
488 m = route.match(line)
489 if m:
490 network, autnum = m.groups()
491
492 # Convert network to string
493 network = network.decode()
494
d773c1bc
MT
495 # Append /24 for IPv4 addresses
496 if not "/" in network and not ":" in network:
497 network = "%s/24" % network
498
83d61c46
MT
499 # Convert AS number to integer
500 autnum = int(autnum)
501
502 log.info("Found announcement for %s by %s" % (network, autnum))
503
504 self.db.execute("INSERT INTO announcements(network, autnum) \
505 VALUES(%s, %s) ON CONFLICT (network) DO \
506 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
507 network, autnum,
508 )
509
510 log.info("Finished reading the %s routing table" % protocol)
511
512 # Purge anything we never want here
513 self.db.execute("""
514 -- Delete default routes
515 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
516
517 -- Delete anything that is not global unicast address space
518 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
519
1d4e4e8f
PM
520 -- DELETE "current network" address space
521 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
522
cedee656
PM
523 -- DELETE local loopback address space
524 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
525
1d4e4e8f 526 -- DELETE RFC 1918 address space
83d61c46
MT
527 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
528 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
529 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
530
209c04b6
PM
531 -- DELETE test, benchmark and documentation address space
532 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
533 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
534 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
535 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
536 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
537
538 -- DELETE CGNAT address space (RFC 6598)
539 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
540
541 -- DELETE link local address space
542 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
543
544 -- DELETE IPv6 to IPv4 (6to4) address space
545 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
546
b89cee80
PM
547 -- DELETE multicast and reserved address space
548 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
549 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
550
83d61c46
MT
551 -- Delete networks that are too small to be in the global routing table
552 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
553 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
554
555 -- Delete any non-public or reserved ASNs
556 DELETE FROM announcements WHERE NOT (
557 (autnum >= 1 AND autnum <= 23455)
558 OR
559 (autnum >= 23457 AND autnum <= 64495)
560 OR
561 (autnum >= 131072 AND autnum <= 4199999999)
562 );
563
564 -- Delete everything that we have not seen for 14 days
565 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
566 """)
567
d7fc3057
MT
568 def handle_update_overrides(self, ns):
569 with self.db.transaction():
570 # Drop all data that we have
571 self.db.execute("""
572 TRUNCATE TABLE autnum_overrides;
573 TRUNCATE TABLE network_overrides;
574 """)
575
576 for file in ns.files:
577 log.info("Reading %s..." % file)
578
579 with open(file, "rb") as f:
580 for type, block in location.importer.read_blocks(f):
581 if type == "net":
582 network = block.get("net")
583 # Try to parse and normalise the network
584 try:
585 network = ipaddress.ip_network(network, strict=False)
586 except ValueError as e:
587 log.warning("Invalid IP network: %s: %s" % (network, e))
588 continue
589
590 self.db.execute("""
591 INSERT INTO network_overrides(
592 network,
593 country,
594 is_anonymous_proxy,
595 is_satellite_provider,
596 is_anycast
597 ) VALUES (%s, %s, %s, %s)
598 ON CONFLICT (network) DO NOTHING""",
599 "%s" % network,
600 block.get("country"),
601 block.get("is-anonymous-proxy") == "yes",
602 block.get("is-satellite-provider") == "yes",
603 block.get("is-anycast") == "yes",
604 )
605
606 elif type == "autnum":
607 autnum = block.get("autnum")
608
609 # Check if AS number begins with "AS"
610 if not autnum.startswith("AS"):
611 log.warning("Invalid AS number: %s" % autnum)
612 continue
613
614 # Strip "AS"
615 autnum = autnum[2:]
616
617 self.db.execute("""
618 INSERT INTO autnum_overrides(
619 number,
620 name,
621 is_anonymous_proxy,
622 is_satellite_provider,
623 is_anycast
624 ) VALUES(%s, %s, %s, %s, %s)
625 ON CONFLICT DO NOTHING""",
626 autnum, block.get("name"),
627 block.get("is-anonymous-proxy") == "yes",
628 block.get("is-satellite-provider") == "yes",
629 block.get("is-anycast") == "yes",
630 )
631
632 else:
633 log.warning("Unsupport type: %s" % type)
634
6ffd06b5
MT
635
636def split_line(line):
637 key, colon, val = line.partition(":")
638
639 # Strip any excess space
640 key = key.strip()
641 val = val.strip()
78ff0cf2 642
6ffd06b5 643 return key, val
78ff0cf2
MT
644
645def main():
646 # Run the command line interface
647 c = CLI()
648 c.run()
649
650main()