]> git.ipfire.org Git - people/ms/libloc.git/blame - src/python/location-importer.in
location-importer.in: Import (technical) AS names from ARIN
[people/ms/libloc.git] / src / python / location-importer.in
CommitLineData
78ff0cf2
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
1814283b 6# Copyright (C) 2020-2021 IPFire Development Team <info@ipfire.org> #
78ff0cf2
MT
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import argparse
6ffd06b5 21import ipaddress
78ff0cf2 22import logging
6ffd06b5
MT
23import math
24import re
22d8d199 25import socket
78ff0cf2 26import sys
83d61c46 27import telnetlib
78ff0cf2
MT
28
29# Load our location module
30import location
29c6fa22 31import location.database
3192b66c 32import location.importer
78ff0cf2
MT
33from location.i18n import _
34
35# Initialise logging
36log = logging.getLogger("location.importer")
37log.propagate = 1
38
39class CLI(object):
40 def parse_cli(self):
41 parser = argparse.ArgumentParser(
42 description=_("Location Importer Command Line Interface"),
43 )
6ffd06b5 44 subparsers = parser.add_subparsers()
78ff0cf2
MT
45
46 # Global configuration flags
47 parser.add_argument("--debug", action="store_true",
48 help=_("Enable debug output"))
bc1f5f53
MT
49 parser.add_argument("--quiet", action="store_true",
50 help=_("Enable quiet mode"))
78ff0cf2
MT
51
52 # version
53 parser.add_argument("--version", action="version",
54 version="%(prog)s @VERSION@")
55
29c6fa22
MT
56 # Database
57 parser.add_argument("--database-host", required=True,
58 help=_("Database Hostname"), metavar=_("HOST"))
59 parser.add_argument("--database-name", required=True,
60 help=_("Database Name"), metavar=_("NAME"))
61 parser.add_argument("--database-username", required=True,
62 help=_("Database Username"), metavar=_("USERNAME"))
63 parser.add_argument("--database-password", required=True,
64 help=_("Database Password"), metavar=_("PASSWORD"))
65
0983f3dd
MT
66 # Write Database
67 write = subparsers.add_parser("write", help=_("Write database to file"))
68 write.set_defaults(func=self.handle_write)
69 write.add_argument("file", nargs=1, help=_("Database File"))
70 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
1164d876 71 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
0983f3dd
MT
72 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
73 write.add_argument("--description", nargs="?", help=_("Sets a description"))
74 write.add_argument("--license", nargs="?", help=_("Sets the license"))
b904896a 75 write.add_argument("--version", type=int, help=_("Database Format Version"))
0983f3dd 76
6ffd06b5
MT
77 # Update WHOIS
78 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
79 update_whois.set_defaults(func=self.handle_update_whois)
80
83d61c46
MT
81 # Update announcements
82 update_announcements = subparsers.add_parser("update-announcements",
83 help=_("Update BGP Annoucements"))
84 update_announcements.set_defaults(func=self.handle_update_announcements)
85 update_announcements.add_argument("server", nargs=1,
86 help=_("Route Server to connect to"), metavar=_("SERVER"))
87
d7fc3057
MT
88 # Update overrides
89 update_overrides = subparsers.add_parser("update-overrides",
90 help=_("Update overrides"),
91 )
92 update_overrides.add_argument(
93 "files", nargs="+", help=_("Files to import"),
94 )
95 update_overrides.set_defaults(func=self.handle_update_overrides)
96
8084b33a
MT
97 # Import countries
98 import_countries = subparsers.add_parser("import-countries",
99 help=_("Import countries"),
100 )
101 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
102 help=_("File to import"))
103 import_countries.set_defaults(func=self.handle_import_countries)
104
78ff0cf2
MT
105 args = parser.parse_args()
106
bc1f5f53 107 # Configure logging
78ff0cf2 108 if args.debug:
f9de5e61 109 location.logger.set_level(logging.DEBUG)
bc1f5f53
MT
110 elif args.quiet:
111 location.logger.set_level(logging.WARNING)
78ff0cf2 112
6ffd06b5
MT
113 # Print usage if no action was given
114 if not "func" in args:
115 parser.print_usage()
116 sys.exit(2)
117
78ff0cf2
MT
118 return args
119
120 def run(self):
121 # Parse command line arguments
122 args = self.parse_cli()
123
29c6fa22 124 # Initialise database
6ffd06b5 125 self.db = self._setup_database(args)
29c6fa22 126
78ff0cf2 127 # Call function
6ffd06b5 128 ret = args.func(args)
78ff0cf2
MT
129
130 # Return with exit code
131 if ret:
132 sys.exit(ret)
133
134 # Otherwise just exit
135 sys.exit(0)
136
29c6fa22
MT
137 def _setup_database(self, ns):
138 """
139 Initialise the database
140 """
141 # Connect to database
142 db = location.database.Connection(
143 host=ns.database_host, database=ns.database_name,
144 user=ns.database_username, password=ns.database_password,
145 )
146
147 with db.transaction():
148 db.execute("""
83d61c46
MT
149 -- announcements
150 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
151 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
152 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
153 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
154 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
a1707d89 155 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
83d61c46 156
6ffd06b5 157 -- autnums
0983f3dd 158 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
28b08385 159 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text NOT NULL;
6ffd06b5
MT
160 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
161
8084b33a
MT
162 -- countries
163 CREATE TABLE IF NOT EXISTS countries(
164 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
165 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
166
429a43d1 167 -- networks
83d61c46 168 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
b6b2b331 169 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
28b08385 170 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text NOT NULL;
429a43d1 171 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
002deb6b 172 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
83d61c46 173 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
d7fc3057
MT
174
175 -- overrides
176 CREATE TABLE IF NOT EXISTS autnum_overrides(
177 number bigint NOT NULL,
178 name text,
bd1aa6a1 179 country text,
b8e25b71
MT
180 is_anonymous_proxy boolean,
181 is_satellite_provider boolean,
182 is_anycast boolean
d7fc3057
MT
183 );
184 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
185 ON autnum_overrides(number);
e17e804e 186 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
d7fc3057
MT
187
188 CREATE TABLE IF NOT EXISTS network_overrides(
189 network inet NOT NULL,
190 country text,
b8e25b71
MT
191 is_anonymous_proxy boolean,
192 is_satellite_provider boolean,
193 is_anycast boolean
d7fc3057
MT
194 );
195 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
196 ON network_overrides(network);
991baf53
MT
197 CREATE INDEX IF NOT EXISTS network_overrides_search
198 ON network_overrides USING GIST(network inet_ops);
e17e804e 199 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
29c6fa22
MT
200 """)
201
202 return db
203
0983f3dd
MT
204 def handle_write(self, ns):
205 """
206 Compiles a database in libloc format out of what is in the database
207 """
0983f3dd 208 # Allocate a writer
1164d876 209 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
0983f3dd
MT
210
211 # Set all metadata
212 if ns.vendor:
213 writer.vendor = ns.vendor
214
215 if ns.description:
216 writer.description = ns.description
217
218 if ns.license:
219 writer.license = ns.license
220
221 # Add all Autonomous Systems
222 log.info("Writing Autonomous Systems...")
223
224 # Select all ASes with a name
6e97c44b
MT
225 rows = self.db.query("""
226 SELECT
227 autnums.number AS number,
228 COALESCE(
229 (SELECT overrides.name FROM autnum_overrides overrides
230 WHERE overrides.number = autnums.number),
231 autnums.name
232 ) AS name
233 FROM autnums
234 WHERE name <> %s ORDER BY number
235 """, "")
0983f3dd
MT
236
237 for row in rows:
238 a = writer.add_as(row.number)
239 a.name = row.name
240
241 # Add all networks
242 log.info("Writing networks...")
243
244 # Select all known networks
245 rows = self.db.query("""
5372d9c7
MT
246 -- Return a list of those networks enriched with all
247 -- other information that we store in the database
0983f3dd 248 SELECT
bbea93a7
MT
249 DISTINCT ON (network)
250 network,
251 autnum,
bd1aa6a1
MT
252
253 -- Country
254 COALESCE(
255 (
256 SELECT country FROM network_overrides overrides
bbea93a7 257 WHERE networks.network <<= overrides.network
bd1aa6a1
MT
258 ORDER BY masklen(overrides.network) DESC
259 LIMIT 1
260 ),
261 (
262 SELECT country FROM autnum_overrides overrides
bbea93a7 263 WHERE networks.autnum = overrides.number
bd1aa6a1
MT
264 ),
265 networks.country
266 ) AS country,
8e8555bb 267
0983f3dd 268 -- Flags
1422b5d4
MT
269 COALESCE(
270 (
271 SELECT is_anonymous_proxy FROM network_overrides overrides
bbea93a7 272 WHERE networks.network <<= overrides.network
1422b5d4
MT
273 ORDER BY masklen(overrides.network) DESC
274 LIMIT 1
275 ),
276 (
277 SELECT is_anonymous_proxy FROM autnum_overrides overrides
bbea93a7 278 WHERE networks.autnum = overrides.number
b8e25b71
MT
279 ),
280 FALSE
1422b5d4
MT
281 ) AS is_anonymous_proxy,
282 COALESCE(
283 (
284 SELECT is_satellite_provider FROM network_overrides overrides
bbea93a7 285 WHERE networks.network <<= overrides.network
1422b5d4
MT
286 ORDER BY masklen(overrides.network) DESC
287 LIMIT 1
288 ),
289 (
290 SELECT is_satellite_provider FROM autnum_overrides overrides
bbea93a7 291 WHERE networks.autnum = overrides.number
b8e25b71
MT
292 ),
293 FALSE
1422b5d4
MT
294 ) AS is_satellite_provider,
295 COALESCE(
296 (
297 SELECT is_anycast FROM network_overrides overrides
bbea93a7 298 WHERE networks.network <<= overrides.network
1422b5d4
MT
299 ORDER BY masklen(overrides.network) DESC
300 LIMIT 1
301 ),
302 (
303 SELECT is_anycast FROM autnum_overrides overrides
bbea93a7 304 WHERE networks.autnum = overrides.number
b8e25b71
MT
305 ),
306 FALSE
e17e804e
PM
307 ) AS is_anycast,
308 COALESCE(
309 (
310 SELECT is_drop FROM network_overrides overrides
311 WHERE networks.network <<= overrides.network
312 ORDER BY masklen(overrides.network) DESC
313 LIMIT 1
314 ),
315 (
316 SELECT is_drop FROM autnum_overrides overrides
317 WHERE networks.autnum = overrides.number
318 ),
319 FALSE
320 ) AS is_drop
bbea93a7
MT
321 FROM (
322 SELECT
323 known_networks.network AS network,
324 announcements.autnum AS autnum,
325 networks.country AS country,
326
327 -- Must be part of returned values for ORDER BY clause
328 masklen(announcements.network) AS sort_a,
329 masklen(networks.network) AS sort_b
330 FROM (
331 SELECT network FROM announcements
332 UNION ALL
333 SELECT network FROM networks
334 UNION ALL
335 SELECT network FROM network_overrides
336 ) known_networks
337 LEFT JOIN
338 announcements ON known_networks.network <<= announcements.network
339 LEFT JOIN
340 networks ON known_networks.network <<= networks.network
341 ORDER BY
342 known_networks.network,
343 sort_a DESC,
344 sort_b DESC
345 ) networks
0983f3dd
MT
346 """)
347
348 for row in rows:
349 network = writer.add_network(row.network)
350
5372d9c7
MT
351 # Save country
352 if row.country:
353 network.country_code = row.country
354
355 # Save ASN
356 if row.autnum:
357 network.asn = row.autnum
0983f3dd
MT
358
359 # Set flags
360 if row.is_anonymous_proxy:
361 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
362
363 if row.is_satellite_provider:
364 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
365
366 if row.is_anycast:
367 network.set_flag(location.NETWORK_FLAG_ANYCAST)
368
e17e804e
PM
369 if row.is_drop:
370 network.set_flag(location.NETWORK_FLAG_DROP)
371
8084b33a
MT
372 # Add all countries
373 log.info("Writing countries...")
374 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
375
376 for row in rows:
377 c = writer.add_country(row.country_code)
378 c.continent_code = row.continent_code
379 c.name = row.name
380
0983f3dd
MT
381 # Write everything to file
382 log.info("Writing database to file...")
383 for file in ns.file:
384 writer.write(file)
385
6ffd06b5
MT
386 def handle_update_whois(self, ns):
387 downloader = location.importer.Downloader()
388
389 # Download all sources
0365119d
MT
390 with self.db.transaction():
391 # Create some temporary tables to store parsed data
392 self.db.execute("""
28b08385 393 CREATE TEMPORARY TABLE _autnums(number integer, organization text, source text NOT NULL)
0365119d
MT
394 ON COMMIT DROP;
395 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
396
28b08385 397 CREATE TEMPORARY TABLE _organizations(handle text, name text NOT NULL, source text NOT NULL)
0365119d
MT
398 ON COMMIT DROP;
399 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
002deb6b 400
b6b2b331 401 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL)
002deb6b
PM
402 ON COMMIT DROP;
403 CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
404 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
405 """)
406
407 # Remove all previously imported content
408 self.db.execute("""
409 TRUNCATE TABLE networks;
0365119d
MT
410 """)
411
84b175e2
PM
412 # Fetch all valid country codes to check parsed networks aganist...
413 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
414 validcountries = []
415
416 for row in rows:
417 validcountries.append(row.country_code)
418
28b08385
PM
419 for source_key in location.importer.WHOIS_SOURCES:
420 for single_url in location.importer.WHOIS_SOURCES[source_key]:
421 with downloader.request(single_url, return_blocks=True) as f:
422 for block in f:
423 self._parse_block(block, source_key, validcountries)
6ffd06b5 424
002deb6b
PM
425 # Process all parsed networks from every RIR we happen to have access to,
426 # insert the largest network chunks into the networks table immediately...
427 families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
428
429 for family in (row.family for row in families):
430 smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
431
b6b2b331
PM
432 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
433 SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
002deb6b
PM
434
435 # ... determine any other prefixes for this network family, ...
436 prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
437 WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family)
438
439 # ... and insert networks with this prefix in case they provide additional
440 # information (i. e. subnet of a larger chunk with a different country)
441 for prefix in (row.prefix for row in prefixes):
442 self.db.execute("""
443 WITH candidates AS (
444 SELECT
445 _rirdata.network,
28b08385 446 _rirdata.country,
b6b2b331 447 _rirdata.original_countries,
28b08385 448 _rirdata.source
002deb6b
PM
449 FROM
450 _rirdata
451 WHERE
452 family(_rirdata.network) = %s
453 AND
454 masklen(_rirdata.network) = %s
455 ),
456 filtered AS (
457 SELECT
458 DISTINCT ON (c.network)
459 c.network,
460 c.country,
b6b2b331 461 c.original_countries,
28b08385 462 c.source,
002deb6b
PM
463 masklen(networks.network),
464 networks.country AS parent_country
465 FROM
466 candidates c
467 LEFT JOIN
468 networks
469 ON
470 c.network << networks.network
471 ORDER BY
472 c.network,
473 masklen(networks.network) DESC NULLS LAST
474 )
475 INSERT INTO
b6b2b331 476 networks(network, country, original_countries, source)
002deb6b
PM
477 SELECT
478 network,
28b08385 479 country,
b6b2b331 480 original_countries,
28b08385 481 source
002deb6b
PM
482 FROM
483 filtered
484 WHERE
485 parent_country IS NULL
486 OR
487 country <> parent_country
488 ON CONFLICT DO NOTHING""",
489 family, prefix,
490 )
491
0365119d 492 self.db.execute("""
28b08385
PM
493 INSERT INTO autnums(number, name, source)
494 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
2cd2e342 495 JOIN _organizations ON _autnums.organization = _organizations.handle
ee6ea398 496 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
0365119d
MT
497 """)
498
429a43d1 499 # Download all extended sources
28b08385
PM
500 for source_key in location.importer.EXTENDED_SOURCES:
501 for single_url in location.importer.EXTENDED_SOURCES[source_key]:
502 with self.db.transaction():
503 # Download data
504 with downloader.request(single_url) as f:
505 for line in f:
506 self._parse_line(line, source_key, validcountries)
429a43d1 507
92403f39
PM
508 # Download and import (technical) AS names from ARIN
509 self._import_as_names_from_arin()
510
bd341642
PM
511 def _check_parsed_network(self, network):
512 """
513 Assistive function to detect and subsequently sort out parsed
514 networks from RIR data (both Whois and so-called "extended sources"),
515 which are or have...
516
517 (a) not globally routable (RFC 1918 space, et al.)
518 (b) covering a too large chunk of the IP address space (prefix length
519 is < 7 for IPv4 networks, and < 10 for IPv6)
520 (c) "0.0.0.0" or "::" as a network address
521 (d) are too small for being publicly announced (we have decided not to
522 process them at the moment, as they significantly enlarge our
523 database without providing very helpful additional information)
524
525 This unfortunately is necessary due to brain-dead clutter across
526 various RIR databases, causing mismatches and eventually disruptions.
527
528 We will return False in case a network is not suitable for adding
529 it to our database, and True otherwise.
530 """
531
532 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
533 return False
534
535 if not network.is_global:
2ba6ed07 536 log.debug("Skipping non-globally routable network: %s" % network)
bd341642
PM
537 return False
538
539 if network.version == 4:
540 if network.prefixlen < 7:
2ba6ed07 541 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
542 return False
543
544 if network.prefixlen > 24:
ebb087cf 545 log.debug("Skipping network too small to be publicly announced: %s" % network)
bd341642
PM
546 return False
547
548 if str(network.network_address) == "0.0.0.0":
2ba6ed07 549 log.debug("Skipping network based on 0.0.0.0: %s" % network)
bd341642
PM
550 return False
551
552 elif network.version == 6:
553 if network.prefixlen < 10:
2ba6ed07 554 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
555 return False
556
557 if network.prefixlen > 48:
ebb087cf 558 log.debug("Skipping network too small to be publicly announced: %s" % network)
bd341642
PM
559 return False
560
561 if str(network.network_address) == "::":
2ba6ed07 562 log.debug("Skipping network based on '::': %s" % network)
bd341642
PM
563 return False
564
565 else:
566 # This should not happen...
84187ab5 567 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
bd341642
PM
568 return False
569
570 # In case we have made it here, the network is considered to
571 # be suitable for libloc consumption...
572 return True
573
28b08385 574 def _parse_block(self, block, source_key, validcountries = None):
6ffd06b5
MT
575 # Get first line to find out what type of block this is
576 line = block[0]
577
6ffd06b5 578 # aut-num
429a43d1 579 if line.startswith("aut-num:"):
28b08385 580 return self._parse_autnum_block(block, source_key)
6ffd06b5 581
aadac4c5
PM
582 # inetnum
583 if line.startswith("inet6num:") or line.startswith("inetnum:"):
28b08385 584 return self._parse_inetnum_block(block, source_key, validcountries)
aadac4c5 585
6ffd06b5
MT
586 # organisation
587 elif line.startswith("organisation:"):
28b08385 588 return self._parse_org_block(block, source_key)
6ffd06b5 589
28b08385 590 def _parse_autnum_block(self, block, source_key):
6ffd06b5
MT
591 autnum = {}
592 for line in block:
593 # Split line
594 key, val = split_line(line)
595
596 if key == "aut-num":
597 m = re.match(r"^(AS|as)(\d+)", val)
598 if m:
599 autnum["asn"] = m.group(2)
600
0365119d 601 elif key == "org":
e7d612e5 602 autnum[key] = val.upper()
6ffd06b5
MT
603
604 # Skip empty objects
605 if not autnum:
606 return
607
608 # Insert into database
28b08385
PM
609 self.db.execute("INSERT INTO _autnums(number, organization, source) \
610 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
0365119d 611 organization = excluded.organization",
28b08385 612 autnum.get("asn"), autnum.get("org"), source_key,
6ffd06b5
MT
613 )
614
28b08385 615 def _parse_inetnum_block(self, block, source_key, validcountries = None):
84187ab5 616 log.debug("Parsing inetnum block:")
aadac4c5
PM
617
618 inetnum = {}
619 for line in block:
84187ab5 620 log.debug(line)
aadac4c5
PM
621
622 # Split line
623 key, val = split_line(line)
624
84187ab5
PM
625 # Filter any inetnum records which are only referring to IP space
626 # not managed by that specific RIR...
627 if key == "netname":
628 if re.match(r"(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK)", val.strip()):
2ba6ed07 629 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
84187ab5
PM
630 return
631
aadac4c5
PM
632 if key == "inetnum":
633 start_address, delim, end_address = val.partition("-")
634
635 # Strip any excess space
636 start_address, end_address = start_address.rstrip(), end_address.strip()
637
638 # Convert to IP address
639 try:
640 start_address = ipaddress.ip_address(start_address)
641 end_address = ipaddress.ip_address(end_address)
642 except ValueError:
84187ab5 643 log.warning("Could not parse line: %s" % line)
aadac4c5
PM
644 return
645
1814283b 646 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
aadac4c5
PM
647
648 elif key == "inet6num":
1814283b 649 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
aadac4c5
PM
650
651 elif key == "country":
7434e5e0
PM
652 val = val.upper()
653
b6b2b331 654 # Catch RIR data objects with more than one country code...
7434e5e0 655 if not key in inetnum:
b6b2b331
PM
656 inetnum[key] = []
657 else:
7434e5e0 658 if val in inetnum.get("country"):
b6b2b331
PM
659 # ... but keep this list distinct...
660 continue
661
7434e5e0 662 inetnum[key].append(val)
aadac4c5
PM
663
664 # Skip empty objects
002deb6b 665 if not inetnum or not "country" in inetnum:
aadac4c5
PM
666 return
667
b6b2b331
PM
668 # Prepare skipping objects with unknown country codes...
669 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
670
1814283b
PM
671 # Iterate through all networks enumerated from above, check them for plausibility and insert
672 # them into the database, if _check_parsed_network() succeeded
673 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
674 if self._check_parsed_network(single_network):
7138b4ac 675
b6b2b331
PM
676 # Skip objects with unknown country codes if they are valid to avoid log spam...
677 if validcountries and invalidcountries:
678 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
679 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
7138b4ac
PM
680
681 # Everything is fine here, run INSERT statement...
b6b2b331
PM
682 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
683 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
684 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
1814283b 685 )
aadac4c5 686
28b08385 687 def _parse_org_block(self, block, source_key):
6ffd06b5
MT
688 org = {}
689 for line in block:
690 # Split line
691 key, val = split_line(line)
692
e7d612e5
PM
693 if key == "organisation":
694 org[key] = val.upper()
695 elif key == "org-name":
6ffd06b5
MT
696 org[key] = val
697
698 # Skip empty objects
699 if not org:
700 return
701
28b08385
PM
702 self.db.execute("INSERT INTO _organizations(handle, name, source) \
703 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
0365119d 704 UPDATE SET name = excluded.name",
28b08385 705 org.get("organisation"), org.get("org-name"), source_key,
6ffd06b5
MT
706 )
707
28b08385 708 def _parse_line(self, line, source_key, validcountries = None):
429a43d1
MT
709 # Skip version line
710 if line.startswith("2"):
711 return
6ffd06b5 712
429a43d1
MT
713 # Skip comments
714 if line.startswith("#"):
715 return
6ffd06b5 716
429a43d1
MT
717 try:
718 registry, country_code, type, line = line.split("|", 3)
719 except:
720 log.warning("Could not parse line: %s" % line)
721 return
6ffd06b5 722
84b175e2
PM
723 # Skip any lines that are for stats only or do not have a country
724 # code at all (avoids log spam below)
725 if not country_code or country_code == '*':
726 return
727
728 # Skip objects with unknown country codes
729 if validcountries and country_code not in validcountries:
730 log.warning("Skipping line with bogus country '%s': %s" % \
731 (country_code, line))
6ffd06b5
MT
732 return
733
429a43d1 734 if type in ("ipv6", "ipv4"):
28b08385 735 return self._parse_ip_line(country_code, type, line, source_key)
429a43d1 736
28b08385 737 def _parse_ip_line(self, country, type, line, source_key):
429a43d1
MT
738 try:
739 address, prefix, date, status, organization = line.split("|")
740 except ValueError:
741 organization = None
742
743 # Try parsing the line without organization
744 try:
745 address, prefix, date, status = line.split("|")
746 except ValueError:
747 log.warning("Unhandled line format: %s" % line)
748 return
749
750 # Skip anything that isn't properly assigned
751 if not status in ("assigned", "allocated"):
752 return
753
754 # Cast prefix into an integer
755 try:
756 prefix = int(prefix)
757 except:
758 log.warning("Invalid prefix: %s" % prefix)
7177031f 759 return
429a43d1
MT
760
761 # Fix prefix length for IPv4
762 if type == "ipv4":
763 prefix = 32 - int(math.log(prefix, 2))
764
765 # Try to parse the address
766 try:
767 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
768 except ValueError:
769 log.warning("Invalid IP address: %s" % address)
770 return
771
bd341642
PM
772 if not self._check_parsed_network(network):
773 return
774
b6b2b331
PM
775 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
776 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
87b3e102 777 UPDATE SET country = excluded.country",
b6b2b331 778 "%s" % network, country, [country], source_key,
6ffd06b5
MT
779 )
780
92403f39
PM
781 def _import_as_names_from_arin(self):
782 downloader = location.importer.Downloader()
783
784 # XXX: Download AS names file from ARIN (note that these names appear to be quite
785 # technical, not intended for human consumption, as description fields in
786 # organisation handles for other RIRs are - however, this is what we have got,
787 # and in some cases, it might be still better than nothing)
788 with downloader.request("https://ftp.arin.net/info/asn.txt", return_blocks=False) as f:
789 for line in f:
790 # Convert binary line to string...
791 line = str(line)
792
793 # ... valid lines start with a space, followed by the number of the Autonomous System ...
794 if not line.startswith(" "):
795 continue
796
797 # Split line and check if there is a valid ASN in it...
798 asn, name = line.split()[0:2]
799
800 try:
801 asn = int(asn)
802 except ValueError:
803 log.debug("Skipping ARIN AS names line not containing an integer for ASN")
804 continue
805
806 if not ((1 <= asn and asn <= 23455) or (23457 <= asn and asn <= 64495) or (131072 <= asn and asn <= 4199999999)):
807 log.debug("Skipping ARIN AS names line not containing a valid ASN: %s" % asn)
808 continue
809
810 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
811 if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|DNIC|LACNIC|RIPE|IANA)(\d?$|\-.*)", name):
812 continue
813
814 # Bail out in case the AS name contains anything we do not expect here...
815 if re.search(r"[^a-zA-Z0-9-_]", name):
816 log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
817 (asn, name))
818
819 # Things look good here, run INSERT statement and skip this one if we already have
820 # a (better?) name for this Autonomous System...
821 self.db.execute("""
822 INSERT INTO autnums(
823 number,
824 name,
825 source
826 ) VALUES (%s, %s, %s)
827 ON CONFLICT (number) DO NOTHING""",
828 asn,
829 name,
830 "ARIN",
831 )
832
83d61c46
MT
833 def handle_update_announcements(self, ns):
834 server = ns.server[0]
835
22d8d199
MT
836 with self.db.transaction():
837 if server.startswith("/"):
838 self._handle_update_announcements_from_bird(server)
839 else:
840 self._handle_update_announcements_from_telnet(server)
841
842 # Purge anything we never want here
843 self.db.execute("""
844 -- Delete default routes
845 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
846
847 -- Delete anything that is not global unicast address space
848 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
849
850 -- DELETE "current network" address space
851 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
852
853 -- DELETE local loopback address space
854 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
855
856 -- DELETE RFC 1918 address space
857 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
858 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
859 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
860
861 -- DELETE test, benchmark and documentation address space
862 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
863 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
864 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
865 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
866 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
867
868 -- DELETE CGNAT address space (RFC 6598)
869 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
870
871 -- DELETE link local address space
872 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
873
b4d5b2a6 874 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
22d8d199 875 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
b4d5b2a6 876 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
22d8d199
MT
877
878 -- DELETE multicast and reserved address space
879 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
880 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
881
882 -- Delete networks that are too small to be in the global routing table
883 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
884 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
885
886 -- Delete any non-public or reserved ASNs
887 DELETE FROM announcements WHERE NOT (
888 (autnum >= 1 AND autnum <= 23455)
889 OR
890 (autnum >= 23457 AND autnum <= 64495)
891 OR
892 (autnum >= 131072 AND autnum <= 4199999999)
893 );
894
895 -- Delete everything that we have not seen for 14 days
896 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
897 """)
898
899 def _handle_update_announcements_from_bird(self, server):
900 # Pre-compile the regular expression for faster searching
dc0be5c5 901 route = re.compile(b"^\s(.+?)\s+.+?\[AS(.*?).\]$")
22d8d199
MT
902
903 log.info("Requesting routing table from Bird (%s)" % server)
904
905 # Send command to list all routes
906 for line in self._bird_cmd(server, "show route"):
907 m = route.match(line)
908 if not m:
909 log.debug("Could not parse line: %s" % line.decode())
910 continue
911
912 # Fetch the extracted network and ASN
913 network, autnum = m.groups()
914
915 # Insert it into the database
916 self.db.execute("INSERT INTO announcements(network, autnum) \
917 VALUES(%s, %s) ON CONFLICT (network) DO \
918 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
919 network.decode(), autnum.decode(),
920 )
921
922 def _handle_update_announcements_from_telnet(self, server):
83d61c46 923 # Pre-compile regular expression for routes
83d61c46
MT
924 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
925
926 with telnetlib.Telnet(server) as t:
927 # Enable debug mode
928 #if ns.debug:
929 # t.set_debuglevel(10)
930
931 # Wait for console greeting
fcd5b8b2
MT
932 greeting = t.read_until(b"> ", timeout=30)
933 if not greeting:
934 log.error("Could not get a console prompt")
935 return 1
83d61c46
MT
936
937 # Disable pagination
938 t.write(b"terminal length 0\n")
939
940 # Wait for the prompt to return
941 t.read_until(b"> ")
942
943 # Fetch the routing tables
22d8d199
MT
944 for protocol in ("ipv6", "ipv4"):
945 log.info("Requesting %s routing table" % protocol)
83d61c46 946
22d8d199
MT
947 # Request the full unicast routing table
948 t.write(b"show bgp %s unicast\n" % protocol.encode())
83d61c46 949
22d8d199
MT
950 # Read entire header which ends with "Path"
951 t.read_until(b"Path\r\n")
83d61c46 952
22d8d199
MT
953 while True:
954 # Try reading a full entry
955 # Those might be broken across multiple lines but ends with i
956 line = t.read_until(b"i\r\n", timeout=5)
957 if not line:
958 break
83d61c46 959
22d8d199
MT
960 # Show line for debugging
961 #log.debug(repr(line))
d773c1bc 962
22d8d199
MT
963 # Try finding a route in here
964 m = route.match(line)
965 if m:
966 network, autnum = m.groups()
83d61c46 967
22d8d199
MT
968 # Convert network to string
969 network = network.decode()
83d61c46 970
22d8d199
MT
971 # Append /24 for IPv4 addresses
972 if not "/" in network and not ":" in network:
973 network = "%s/24" % network
83d61c46 974
22d8d199
MT
975 # Convert AS number to integer
976 autnum = int(autnum)
83d61c46 977
22d8d199 978 log.info("Found announcement for %s by %s" % (network, autnum))
83d61c46 979
22d8d199
MT
980 self.db.execute("INSERT INTO announcements(network, autnum) \
981 VALUES(%s, %s) ON CONFLICT (network) DO \
982 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
983 network, autnum,
984 )
83d61c46 985
22d8d199 986 log.info("Finished reading the %s routing table" % protocol)
1d4e4e8f 987
22d8d199
MT
988 def _bird_cmd(self, socket_path, command):
989 # Connect to the socket
990 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
991 s.connect(socket_path)
cedee656 992
22d8d199
MT
993 # Allocate some buffer
994 buffer = b""
83d61c46 995
22d8d199
MT
996 # Send the command
997 s.send(b"%s\n" % command.encode())
209c04b6 998
22d8d199
MT
999 while True:
1000 # Fill up the buffer
1001 buffer += s.recv(4096)
209c04b6 1002
22d8d199
MT
1003 while True:
1004 # Search for the next newline
1005 pos = buffer.find(b"\n")
209c04b6 1006
22d8d199
MT
1007 # If we cannot find one, we go back and read more data
1008 if pos <= 0:
1009 break
209c04b6 1010
22d8d199
MT
1011 # Cut after the newline character
1012 pos += 1
b89cee80 1013
22d8d199
MT
1014 # Split the line we want and keep the rest in buffer
1015 line, buffer = buffer[:pos], buffer[pos:]
83d61c46 1016
22d8d199
MT
1017 # Look for the end-of-output indicator
1018 if line == b"0000 \n":
1019 return
83d61c46 1020
22d8d199
MT
1021 # Otherwise return the line
1022 yield line
83d61c46 1023
d7fc3057
MT
1024 def handle_update_overrides(self, ns):
1025 with self.db.transaction():
1026 # Drop all data that we have
1027 self.db.execute("""
1028 TRUNCATE TABLE autnum_overrides;
1029 TRUNCATE TABLE network_overrides;
1030 """)
1031
1032 for file in ns.files:
1033 log.info("Reading %s..." % file)
1034
1035 with open(file, "rb") as f:
1036 for type, block in location.importer.read_blocks(f):
1037 if type == "net":
1038 network = block.get("net")
1039 # Try to parse and normalise the network
1040 try:
1041 network = ipaddress.ip_network(network, strict=False)
1042 except ValueError as e:
1043 log.warning("Invalid IP network: %s: %s" % (network, e))
1044 continue
1045
94dfab8c
MT
1046 # Prevent that we overwrite all networks
1047 if network.prefixlen == 0:
1048 log.warning("Skipping %s: You cannot overwrite default" % network)
1049 continue
1050
d7fc3057
MT
1051 self.db.execute("""
1052 INSERT INTO network_overrides(
1053 network,
1054 country,
1055 is_anonymous_proxy,
1056 is_satellite_provider,
e17e804e
PM
1057 is_anycast,
1058 is_drop
1059 ) VALUES (%s, %s, %s, %s, %s, %s)
d7fc3057
MT
1060 ON CONFLICT (network) DO NOTHING""",
1061 "%s" % network,
1062 block.get("country"),
28d29b7c
MT
1063 self._parse_bool(block, "is-anonymous-proxy"),
1064 self._parse_bool(block, "is-satellite-provider"),
1065 self._parse_bool(block, "is-anycast"),
e17e804e 1066 self._parse_bool(block, "drop"),
d7fc3057
MT
1067 )
1068
f476cdfd
MT
1069 elif type == "aut-num":
1070 autnum = block.get("aut-num")
d7fc3057
MT
1071
1072 # Check if AS number begins with "AS"
1073 if not autnum.startswith("AS"):
1074 log.warning("Invalid AS number: %s" % autnum)
1075 continue
1076
1077 # Strip "AS"
1078 autnum = autnum[2:]
1079
1080 self.db.execute("""
1081 INSERT INTO autnum_overrides(
1082 number,
1083 name,
bd1aa6a1 1084 country,
d7fc3057
MT
1085 is_anonymous_proxy,
1086 is_satellite_provider,
e17e804e
PM
1087 is_anycast,
1088 is_drop
1089 ) VALUES(%s, %s, %s, %s, %s, %s, %s)
d7fc3057 1090 ON CONFLICT DO NOTHING""",
bd1aa6a1
MT
1091 autnum,
1092 block.get("name"),
1093 block.get("country"),
28d29b7c
MT
1094 self._parse_bool(block, "is-anonymous-proxy"),
1095 self._parse_bool(block, "is-satellite-provider"),
1096 self._parse_bool(block, "is-anycast"),
e17e804e 1097 self._parse_bool(block, "drop"),
d7fc3057
MT
1098 )
1099
1100 else:
03d24a9b 1101 log.warning("Unsupported type: %s" % type)
d7fc3057 1102
28d29b7c
MT
1103 @staticmethod
1104 def _parse_bool(block, key):
1105 val = block.get(key)
1106
1107 # There is no point to proceed when we got None
1108 if val is None:
1109 return
1110
1111 # Convert to lowercase
1112 val = val.lower()
1113
1114 # True
1115 if val in ("yes", "1"):
1116 return True
1117
1118 # False
1119 if val in ("no", "0"):
1120 return False
1121
1122 # Default to None
1123 return None
1124
8084b33a
MT
1125 def handle_import_countries(self, ns):
1126 with self.db.transaction():
1127 # Drop all data that we have
1128 self.db.execute("TRUNCATE TABLE countries")
1129
1130 for file in ns.file:
1131 for line in file:
1132 line = line.rstrip()
1133
1134 # Ignore any comments
1135 if line.startswith("#"):
1136 continue
1137
1138 try:
1139 country_code, continent_code, name = line.split(maxsplit=2)
1140 except:
1141 log.warning("Could not parse line: %s" % line)
1142 continue
1143
1144 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
1145 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
1146
6ffd06b5
MT
1147
1148def split_line(line):
1149 key, colon, val = line.partition(":")
1150
1151 # Strip any excess space
1152 key = key.strip()
1153 val = val.strip()
78ff0cf2 1154
6ffd06b5 1155 return key, val
78ff0cf2
MT
1156
1157def main():
1158 # Run the command line interface
1159 c = CLI()
1160 c.run()
1161
1162main()