]> git.ipfire.org Git - location/libloc.git/blame - src/scripts/location-importer.in
importer: Tolerate that data might exist from other RIRs
[location/libloc.git] / src / scripts / location-importer.in
CommitLineData
78ff0cf2
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
2ad36187 6# Copyright (C) 2020-2022 IPFire Development Team <info@ipfire.org> #
78ff0cf2
MT
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import argparse
6ffd06b5 21import ipaddress
dcef2ba4 22import json
78ff0cf2 23import logging
6ffd06b5
MT
24import math
25import re
22d8d199 26import socket
78ff0cf2 27import sys
83d61c46 28import telnetlib
3ce28dea 29import urllib.error
78ff0cf2
MT
30
31# Load our location module
32import location
29c6fa22 33import location.database
3192b66c 34import location.importer
78ff0cf2
MT
35from location.i18n import _
36
37# Initialise logging
38log = logging.getLogger("location.importer")
39log.propagate = 1
40
43fe570c
PM
41# Define constants
42VALID_ASN_RANGES = (
43 (1, 23455),
44 (23457, 64495),
45 (131072, 4199999999),
46)
47
48
78ff0cf2
MT
49class CLI(object):
50 def parse_cli(self):
51 parser = argparse.ArgumentParser(
52 description=_("Location Importer Command Line Interface"),
53 )
6ffd06b5 54 subparsers = parser.add_subparsers()
78ff0cf2
MT
55
56 # Global configuration flags
57 parser.add_argument("--debug", action="store_true",
58 help=_("Enable debug output"))
bc1f5f53
MT
59 parser.add_argument("--quiet", action="store_true",
60 help=_("Enable quiet mode"))
78ff0cf2
MT
61
62 # version
63 parser.add_argument("--version", action="version",
64 version="%(prog)s @VERSION@")
65
29c6fa22
MT
66 # Database
67 parser.add_argument("--database-host", required=True,
68 help=_("Database Hostname"), metavar=_("HOST"))
69 parser.add_argument("--database-name", required=True,
70 help=_("Database Name"), metavar=_("NAME"))
71 parser.add_argument("--database-username", required=True,
72 help=_("Database Username"), metavar=_("USERNAME"))
73 parser.add_argument("--database-password", required=True,
74 help=_("Database Password"), metavar=_("PASSWORD"))
75
0983f3dd
MT
76 # Write Database
77 write = subparsers.add_parser("write", help=_("Write database to file"))
78 write.set_defaults(func=self.handle_write)
79 write.add_argument("file", nargs=1, help=_("Database File"))
80 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
1164d876 81 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
0983f3dd
MT
82 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
83 write.add_argument("--description", nargs="?", help=_("Sets a description"))
84 write.add_argument("--license", nargs="?", help=_("Sets the license"))
b904896a 85 write.add_argument("--version", type=int, help=_("Database Format Version"))
0983f3dd 86
6ffd06b5
MT
87 # Update WHOIS
88 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
89 update_whois.set_defaults(func=self.handle_update_whois)
90
83d61c46
MT
91 # Update announcements
92 update_announcements = subparsers.add_parser("update-announcements",
93 help=_("Update BGP Annoucements"))
94 update_announcements.set_defaults(func=self.handle_update_announcements)
95 update_announcements.add_argument("server", nargs=1,
96 help=_("Route Server to connect to"), metavar=_("SERVER"))
97
d7fc3057
MT
98 # Update overrides
99 update_overrides = subparsers.add_parser("update-overrides",
100 help=_("Update overrides"),
101 )
102 update_overrides.add_argument(
103 "files", nargs="+", help=_("Files to import"),
104 )
105 update_overrides.set_defaults(func=self.handle_update_overrides)
106
8084b33a
MT
107 # Import countries
108 import_countries = subparsers.add_parser("import-countries",
109 help=_("Import countries"),
110 )
111 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
112 help=_("File to import"))
113 import_countries.set_defaults(func=self.handle_import_countries)
114
78ff0cf2
MT
115 args = parser.parse_args()
116
bc1f5f53 117 # Configure logging
78ff0cf2 118 if args.debug:
f9de5e61 119 location.logger.set_level(logging.DEBUG)
bc1f5f53
MT
120 elif args.quiet:
121 location.logger.set_level(logging.WARNING)
78ff0cf2 122
6ffd06b5
MT
123 # Print usage if no action was given
124 if not "func" in args:
125 parser.print_usage()
126 sys.exit(2)
127
78ff0cf2
MT
128 return args
129
130 def run(self):
131 # Parse command line arguments
132 args = self.parse_cli()
133
29c6fa22 134 # Initialise database
6ffd06b5 135 self.db = self._setup_database(args)
29c6fa22 136
78ff0cf2 137 # Call function
6ffd06b5 138 ret = args.func(args)
78ff0cf2
MT
139
140 # Return with exit code
141 if ret:
142 sys.exit(ret)
143
144 # Otherwise just exit
145 sys.exit(0)
146
29c6fa22
MT
147 def _setup_database(self, ns):
148 """
149 Initialise the database
150 """
151 # Connect to database
152 db = location.database.Connection(
153 host=ns.database_host, database=ns.database_name,
154 user=ns.database_username, password=ns.database_password,
155 )
156
157 with db.transaction():
158 db.execute("""
83d61c46
MT
159 -- announcements
160 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
161 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
162 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
163 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
164 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
a1707d89 165 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
83d61c46 166
6ffd06b5 167 -- autnums
0983f3dd 168 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
26f06e70 169 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
6ffd06b5
MT
170 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
171
8084b33a
MT
172 -- countries
173 CREATE TABLE IF NOT EXISTS countries(
174 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
175 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
176
429a43d1 177 -- networks
83d61c46 178 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
b6b2b331 179 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
26f06e70 180 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
429a43d1 181 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
002deb6b 182 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
83d61c46 183 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
d7fc3057
MT
184
185 -- overrides
186 CREATE TABLE IF NOT EXISTS autnum_overrides(
187 number bigint NOT NULL,
188 name text,
bd1aa6a1 189 country text,
b8e25b71
MT
190 is_anonymous_proxy boolean,
191 is_satellite_provider boolean,
192 is_anycast boolean
d7fc3057
MT
193 );
194 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
195 ON autnum_overrides(number);
39ee3120 196 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
e17e804e 197 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
d7fc3057
MT
198
199 CREATE TABLE IF NOT EXISTS network_overrides(
200 network inet NOT NULL,
201 country text,
b8e25b71
MT
202 is_anonymous_proxy boolean,
203 is_satellite_provider boolean,
204 is_anycast boolean
d7fc3057
MT
205 );
206 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
207 ON network_overrides(network);
991baf53
MT
208 CREATE INDEX IF NOT EXISTS network_overrides_search
209 ON network_overrides USING GIST(network inet_ops);
39ee3120 210 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
e17e804e 211 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
29c6fa22
MT
212 """)
213
214 return db
215
0983f3dd
MT
216 def handle_write(self, ns):
217 """
218 Compiles a database in libloc format out of what is in the database
219 """
0983f3dd 220 # Allocate a writer
1164d876 221 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
0983f3dd
MT
222
223 # Set all metadata
224 if ns.vendor:
225 writer.vendor = ns.vendor
226
227 if ns.description:
228 writer.description = ns.description
229
230 if ns.license:
231 writer.license = ns.license
232
233 # Add all Autonomous Systems
234 log.info("Writing Autonomous Systems...")
235
236 # Select all ASes with a name
6e97c44b
MT
237 rows = self.db.query("""
238 SELECT
239 autnums.number AS number,
240 COALESCE(
241 (SELECT overrides.name FROM autnum_overrides overrides
242 WHERE overrides.number = autnums.number),
243 autnums.name
244 ) AS name
245 FROM autnums
246 WHERE name <> %s ORDER BY number
247 """, "")
0983f3dd
MT
248
249 for row in rows:
250 a = writer.add_as(row.number)
251 a.name = row.name
252
253 # Add all networks
254 log.info("Writing networks...")
255
256 # Select all known networks
257 rows = self.db.query("""
295a7774
MT
258 WITH known_networks AS (
259 SELECT network FROM announcements
260 UNION
261 SELECT network FROM networks
262 UNION
263 SELECT network FROM network_overrides
264 ),
265
266 ordered_networks AS (
267 SELECT
268 known_networks.network AS network,
269 announcements.autnum AS autnum,
270 networks.country AS country,
271
272 -- Must be part of returned values for ORDER BY clause
273 masklen(announcements.network) AS sort_a,
274 masklen(networks.network) AS sort_b
275 FROM
276 known_networks
277 LEFT JOIN
278 announcements ON known_networks.network <<= announcements.network
279 LEFT JOIN
280 networks ON known_networks.network <<= networks.network
281 ORDER BY
282 known_networks.network,
283 sort_a DESC,
284 sort_b DESC
285 )
286
5372d9c7
MT
287 -- Return a list of those networks enriched with all
288 -- other information that we store in the database
0983f3dd 289 SELECT
bbea93a7
MT
290 DISTINCT ON (network)
291 network,
292 autnum,
bd1aa6a1
MT
293
294 -- Country
295 COALESCE(
296 (
297 SELECT country FROM network_overrides overrides
bbea93a7 298 WHERE networks.network <<= overrides.network
bd1aa6a1
MT
299 ORDER BY masklen(overrides.network) DESC
300 LIMIT 1
301 ),
302 (
303 SELECT country FROM autnum_overrides overrides
bbea93a7 304 WHERE networks.autnum = overrides.number
bd1aa6a1
MT
305 ),
306 networks.country
307 ) AS country,
8e8555bb 308
0983f3dd 309 -- Flags
1422b5d4
MT
310 COALESCE(
311 (
312 SELECT is_anonymous_proxy FROM network_overrides overrides
bbea93a7 313 WHERE networks.network <<= overrides.network
1422b5d4
MT
314 ORDER BY masklen(overrides.network) DESC
315 LIMIT 1
316 ),
317 (
318 SELECT is_anonymous_proxy FROM autnum_overrides overrides
bbea93a7 319 WHERE networks.autnum = overrides.number
b8e25b71
MT
320 ),
321 FALSE
1422b5d4
MT
322 ) AS is_anonymous_proxy,
323 COALESCE(
324 (
325 SELECT is_satellite_provider FROM network_overrides overrides
bbea93a7 326 WHERE networks.network <<= overrides.network
1422b5d4
MT
327 ORDER BY masklen(overrides.network) DESC
328 LIMIT 1
329 ),
330 (
331 SELECT is_satellite_provider FROM autnum_overrides overrides
bbea93a7 332 WHERE networks.autnum = overrides.number
b8e25b71
MT
333 ),
334 FALSE
1422b5d4
MT
335 ) AS is_satellite_provider,
336 COALESCE(
337 (
338 SELECT is_anycast FROM network_overrides overrides
bbea93a7 339 WHERE networks.network <<= overrides.network
1422b5d4
MT
340 ORDER BY masklen(overrides.network) DESC
341 LIMIT 1
342 ),
343 (
344 SELECT is_anycast FROM autnum_overrides overrides
bbea93a7 345 WHERE networks.autnum = overrides.number
b8e25b71
MT
346 ),
347 FALSE
e17e804e
PM
348 ) AS is_anycast,
349 COALESCE(
350 (
351 SELECT is_drop FROM network_overrides overrides
352 WHERE networks.network <<= overrides.network
353 ORDER BY masklen(overrides.network) DESC
354 LIMIT 1
355 ),
356 (
357 SELECT is_drop FROM autnum_overrides overrides
358 WHERE networks.autnum = overrides.number
359 ),
360 FALSE
361 ) AS is_drop
295a7774
MT
362 FROM
363 ordered_networks networks
0983f3dd
MT
364 """)
365
366 for row in rows:
367 network = writer.add_network(row.network)
368
5372d9c7
MT
369 # Save country
370 if row.country:
371 network.country_code = row.country
372
373 # Save ASN
374 if row.autnum:
375 network.asn = row.autnum
0983f3dd
MT
376
377 # Set flags
378 if row.is_anonymous_proxy:
379 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
380
381 if row.is_satellite_provider:
382 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
383
384 if row.is_anycast:
385 network.set_flag(location.NETWORK_FLAG_ANYCAST)
386
e17e804e
PM
387 if row.is_drop:
388 network.set_flag(location.NETWORK_FLAG_DROP)
389
8084b33a
MT
390 # Add all countries
391 log.info("Writing countries...")
392 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
393
394 for row in rows:
395 c = writer.add_country(row.country_code)
396 c.continent_code = row.continent_code
397 c.name = row.name
398
0983f3dd
MT
399 # Write everything to file
400 log.info("Writing database to file...")
401 for file in ns.file:
402 writer.write(file)
403
6ffd06b5
MT
404 def handle_update_whois(self, ns):
405 downloader = location.importer.Downloader()
406
3ce28dea
MT
407 # Did we run successfully?
408 error = False
84b175e2 409
3ce28dea
MT
410 # Fetch all valid country codes to check parsed networks aganist
411 validcountries = self.countries
6ffd06b5 412
3ce28dea
MT
413 # Iterate over all potential sources
414 for source in location.importer.SOURCES:
415 with self.db.transaction():
416 # Create some temporary tables to store parsed data
417 self.db.execute("""
418 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
419 organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
420 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
421
422 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
423 name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
424 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
425
426 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL,
427 original_countries text[] NOT NULL, source text NOT NULL)
428 ON COMMIT DROP;
429 CREATE INDEX _rirdata_search ON _rirdata
430 USING BTREE(family(network), masklen(network));
431 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
432 """)
002deb6b 433
3ce28dea
MT
434 # Remove all previously imported content
435 self.db.execute("DELETE FROM networks WHERE source = %s", source)
002deb6b 436
3ce28dea
MT
437 try:
438 # Fetch WHOIS sources
439 for url in location.importer.WHOIS_SOURCES.get(source, []):
440 for block in downloader.request_blocks(url):
441 self._parse_block(block, source, validcountries)
442
443 # Fetch extended sources
444 for url in location.importer.EXTENDED_SOURCES.get(source, []):
445 for line in downloader.request_lines(url):
446 self._parse_line(line, source, validcountries)
447 except urllib.error.URLError as e:
e76b8204 448 log.error("Could not retrieve data from %s: %s" % (source, e))
3ce28dea
MT
449 error = True
450
451 # Continue with the next source
452 continue
002deb6b 453
3ce28dea
MT
454 # Process all parsed networks from every RIR we happen to have access to,
455 # insert the largest network chunks into the networks table immediately...
456 families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata \
457 ORDER BY family(network)")
458
459 for family in (row.family for row in families):
5bfa1bb4
MT
460 # Fetch the smallest mask length in our data set
461 smallest = self.db.get("""
462 SELECT
463 MIN(
464 masklen(network)
465 ) AS prefix
466 FROM
467 _rirdata
468 WHERE
469 family(network) = %s""",
470 family,
471 )
472
473 # Copy all networks
474 self.db.execute("""
475 INSERT INTO
476 networks
477 (
478 network,
479 country,
480 original_countries,
481 source
482 )
483 SELECT
484 network,
485 country,
486 original_countries,
487 source
488 FROM
489 _rirdata
490 WHERE
491 masklen(network) = %s
492 AND
493 family(network) = %s
494 ON CONFLICT DO
495 NOTHING""",
496 smallest.prefix,
497 family,
498 )
3ce28dea
MT
499
500 # ... determine any other prefixes for this network family, ...
5bfa1bb4
MT
501 prefixes = self.db.query("""
502 SELECT
503 DISTINCT masklen(network) AS prefix
504 FROM
505 _rirdata
506 WHERE
507 family(network) = %s
508 ORDER BY
509 masklen(network) ASC
510 OFFSET 1""",
511 family,
512 )
3ce28dea
MT
513
514 # ... and insert networks with this prefix in case they provide additional
515 # information (i. e. subnet of a larger chunk with a different country)
516 for prefix in (row.prefix for row in prefixes):
517 self.db.execute("""
518 WITH candidates AS (
519 SELECT
520 _rirdata.network,
521 _rirdata.country,
522 _rirdata.original_countries,
523 _rirdata.source
524 FROM
525 _rirdata
526 WHERE
527 family(_rirdata.network) = %s
528 AND
529 masklen(_rirdata.network) = %s
530 ),
531 filtered AS (
532 SELECT
533 DISTINCT ON (c.network)
534 c.network,
535 c.country,
536 c.original_countries,
537 c.source,
538 masklen(networks.network),
539 networks.country AS parent_country
540 FROM
541 candidates c
542 LEFT JOIN
543 networks
544 ON
545 c.network << networks.network
546 ORDER BY
547 c.network,
548 masklen(networks.network) DESC NULLS LAST
549 )
550 INSERT INTO
551 networks(network, country, original_countries, source)
002deb6b 552 SELECT
3ce28dea
MT
553 network,
554 country,
555 original_countries,
556 source
002deb6b 557 FROM
3ce28dea 558 filtered
002deb6b 559 WHERE
3ce28dea
MT
560 parent_country IS NULL
561 OR
562 country <> parent_country
563 ON CONFLICT DO NOTHING""",
564 family, prefix,
002deb6b 565 )
0365119d 566
3ce28dea
MT
567 self.db.execute("""
568 INSERT INTO autnums(number, name, source)
569 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
570 JOIN _organizations ON _autnums.organization = _organizations.handle
571 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
572 """)
429a43d1 573
3ce28dea
MT
574 # Download and import (technical) AS names from ARIN
575 with self.db.transaction():
671e149e 576 self._import_as_names_from_arin()
92403f39 577
3ce28dea
MT
578 # Return a non-zero exit code for errors
579 return 1 if error else 0
580
bd341642
PM
581 def _check_parsed_network(self, network):
582 """
583 Assistive function to detect and subsequently sort out parsed
584 networks from RIR data (both Whois and so-called "extended sources"),
585 which are or have...
586
587 (a) not globally routable (RFC 1918 space, et al.)
588 (b) covering a too large chunk of the IP address space (prefix length
589 is < 7 for IPv4 networks, and < 10 for IPv6)
590 (c) "0.0.0.0" or "::" as a network address
591 (d) are too small for being publicly announced (we have decided not to
592 process them at the moment, as they significantly enlarge our
593 database without providing very helpful additional information)
594
595 This unfortunately is necessary due to brain-dead clutter across
596 various RIR databases, causing mismatches and eventually disruptions.
597
598 We will return False in case a network is not suitable for adding
599 it to our database, and True otherwise.
600 """
601
602 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
603 return False
604
605 if not network.is_global:
2ba6ed07 606 log.debug("Skipping non-globally routable network: %s" % network)
bd341642
PM
607 return False
608
609 if network.version == 4:
610 if network.prefixlen < 7:
2ba6ed07 611 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
612 return False
613
614 if network.prefixlen > 24:
ebb087cf 615 log.debug("Skipping network too small to be publicly announced: %s" % network)
bd341642
PM
616 return False
617
618 if str(network.network_address) == "0.0.0.0":
2ba6ed07 619 log.debug("Skipping network based on 0.0.0.0: %s" % network)
bd341642
PM
620 return False
621
622 elif network.version == 6:
623 if network.prefixlen < 10:
2ba6ed07 624 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
625 return False
626
627 if network.prefixlen > 48:
ebb087cf 628 log.debug("Skipping network too small to be publicly announced: %s" % network)
bd341642
PM
629 return False
630
631 if str(network.network_address) == "::":
2ba6ed07 632 log.debug("Skipping network based on '::': %s" % network)
bd341642
PM
633 return False
634
635 else:
636 # This should not happen...
84187ab5 637 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
bd341642
PM
638 return False
639
640 # In case we have made it here, the network is considered to
641 # be suitable for libloc consumption...
642 return True
643
43fe570c
PM
644 def _check_parsed_asn(self, asn):
645 """
646 Assistive function to filter Autonomous System Numbers not being suitable
647 for adding to our database. Returns False in such cases, and True otherwise.
648 """
649
650 for start, end in VALID_ASN_RANGES:
651 if start <= asn and end >= asn:
652 return True
653
654 log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
655 return False
656
28b08385 657 def _parse_block(self, block, source_key, validcountries = None):
6ffd06b5
MT
658 # Get first line to find out what type of block this is
659 line = block[0]
660
6ffd06b5 661 # aut-num
429a43d1 662 if line.startswith("aut-num:"):
28b08385 663 return self._parse_autnum_block(block, source_key)
6ffd06b5 664
aadac4c5
PM
665 # inetnum
666 if line.startswith("inet6num:") or line.startswith("inetnum:"):
28b08385 667 return self._parse_inetnum_block(block, source_key, validcountries)
aadac4c5 668
6ffd06b5
MT
669 # organisation
670 elif line.startswith("organisation:"):
28b08385 671 return self._parse_org_block(block, source_key)
6ffd06b5 672
28b08385 673 def _parse_autnum_block(self, block, source_key):
6ffd06b5
MT
674 autnum = {}
675 for line in block:
676 # Split line
677 key, val = split_line(line)
678
679 if key == "aut-num":
680 m = re.match(r"^(AS|as)(\d+)", val)
681 if m:
682 autnum["asn"] = m.group(2)
683
0365119d 684 elif key == "org":
e7d612e5 685 autnum[key] = val.upper()
6ffd06b5 686
426e0bee
PM
687 elif key == "descr":
688 # Save the first description line as well...
689 if not key in autnum:
690 autnum[key] = val
691
6ffd06b5 692 # Skip empty objects
426e0bee 693 if not autnum or not "asn" in autnum:
6ffd06b5
MT
694 return
695
426e0bee
PM
696 # Insert a dummy organisation handle into our temporary organisations
697 # table in case the AS does not have an organisation handle set, but
698 # has a description (a quirk often observed in APNIC area), so we can
699 # later display at least some string for this AS.
700 if not "org" in autnum:
701 if "descr" in autnum:
702 autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn")
703
704 self.db.execute("INSERT INTO _organizations(handle, name, source) \
705 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
706 autnum.get("org"), autnum.get("descr"), source_key,
707 )
708 else:
709 log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
710 autnum.get("asn"))
711 return
712
6ffd06b5 713 # Insert into database
28b08385
PM
714 self.db.execute("INSERT INTO _autnums(number, organization, source) \
715 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
0365119d 716 organization = excluded.organization",
28b08385 717 autnum.get("asn"), autnum.get("org"), source_key,
6ffd06b5
MT
718 )
719
28b08385 720 def _parse_inetnum_block(self, block, source_key, validcountries = None):
84187ab5 721 log.debug("Parsing inetnum block:")
aadac4c5
PM
722
723 inetnum = {}
724 for line in block:
84187ab5 725 log.debug(line)
aadac4c5
PM
726
727 # Split line
728 key, val = split_line(line)
729
84187ab5
PM
730 # Filter any inetnum records which are only referring to IP space
731 # not managed by that specific RIR...
732 if key == "netname":
5254e5fc 733 if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()):
2ba6ed07 734 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
84187ab5
PM
735 return
736
aadac4c5
PM
737 if key == "inetnum":
738 start_address, delim, end_address = val.partition("-")
739
740 # Strip any excess space
741 start_address, end_address = start_address.rstrip(), end_address.strip()
742
845da577
PM
743 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
744 if start_address and not (delim or end_address):
745 try:
746 start_address = ipaddress.ip_network(start_address, strict=False)
747 except ValueError:
748 start_address = start_address.split("/")
38d0faea 749 ldigits = start_address[0].count(".")
845da577
PM
750
751 # How many octets do we need to add?
752 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
2ad36187 753 if ldigits == 1:
845da577 754 start_address = start_address[0] + ".0.0/" + start_address[1]
2ad36187 755 elif ldigits == 2:
845da577
PM
756 start_address = start_address[0] + ".0/" + start_address[1]
757 else:
758 log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line)
759 return
760
761 try:
762 start_address = ipaddress.ip_network(start_address, strict=False)
763 except ValueError:
764 log.warning("Could not parse line in LACNIC DB format: %s" % line)
765 return
766
767 # Enumerate first and last IP address of this network
768 end_address = start_address[-1]
769 start_address = start_address[0]
770
771 else:
772 # Convert to IP address
773 try:
774 start_address = ipaddress.ip_address(start_address)
775 end_address = ipaddress.ip_address(end_address)
776 except ValueError:
777 log.warning("Could not parse line: %s" % line)
778 return
aadac4c5 779
1814283b 780 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
aadac4c5
PM
781
782 elif key == "inet6num":
1814283b 783 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
aadac4c5
PM
784
785 elif key == "country":
7434e5e0
PM
786 val = val.upper()
787
b6b2b331 788 # Catch RIR data objects with more than one country code...
7434e5e0 789 if not key in inetnum:
b6b2b331
PM
790 inetnum[key] = []
791 else:
7434e5e0 792 if val in inetnum.get("country"):
b6b2b331
PM
793 # ... but keep this list distinct...
794 continue
795
01e5f0ff
PM
796 # When people set country codes to "UK", they actually mean "GB"
797 if val == "UK":
798 val = "GB"
799
7434e5e0 800 inetnum[key].append(val)
aadac4c5
PM
801
802 # Skip empty objects
002deb6b 803 if not inetnum or not "country" in inetnum:
aadac4c5
PM
804 return
805
b6b2b331
PM
806 # Prepare skipping objects with unknown country codes...
807 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
808
1814283b
PM
809 # Iterate through all networks enumerated from above, check them for plausibility and insert
810 # them into the database, if _check_parsed_network() succeeded
811 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
812 if self._check_parsed_network(single_network):
7138b4ac 813
b6b2b331
PM
814 # Skip objects with unknown country codes if they are valid to avoid log spam...
815 if validcountries and invalidcountries:
816 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
817 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
48770ca8 818 break
7138b4ac
PM
819
820 # Everything is fine here, run INSERT statement...
b6b2b331
PM
821 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
822 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
823 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
1814283b 824 )
aadac4c5 825
28b08385 826 def _parse_org_block(self, block, source_key):
6ffd06b5
MT
827 org = {}
828 for line in block:
829 # Split line
830 key, val = split_line(line)
831
e7d612e5
PM
832 if key == "organisation":
833 org[key] = val.upper()
834 elif key == "org-name":
6ffd06b5
MT
835 org[key] = val
836
837 # Skip empty objects
838 if not org:
839 return
840
28b08385
PM
841 self.db.execute("INSERT INTO _organizations(handle, name, source) \
842 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
0365119d 843 UPDATE SET name = excluded.name",
28b08385 844 org.get("organisation"), org.get("org-name"), source_key,
6ffd06b5
MT
845 )
846
28b08385 847 def _parse_line(self, line, source_key, validcountries = None):
429a43d1
MT
848 # Skip version line
849 if line.startswith("2"):
850 return
6ffd06b5 851
429a43d1
MT
852 # Skip comments
853 if line.startswith("#"):
854 return
6ffd06b5 855
429a43d1
MT
856 try:
857 registry, country_code, type, line = line.split("|", 3)
858 except:
859 log.warning("Could not parse line: %s" % line)
860 return
6ffd06b5 861
84b175e2
PM
862 # Skip any lines that are for stats only or do not have a country
863 # code at all (avoids log spam below)
864 if not country_code or country_code == '*':
865 return
866
867 # Skip objects with unknown country codes
868 if validcountries and country_code not in validcountries:
869 log.warning("Skipping line with bogus country '%s': %s" % \
870 (country_code, line))
6ffd06b5
MT
871 return
872
429a43d1 873 if type in ("ipv6", "ipv4"):
28b08385 874 return self._parse_ip_line(country_code, type, line, source_key)
429a43d1 875
28b08385 876 def _parse_ip_line(self, country, type, line, source_key):
429a43d1
MT
877 try:
878 address, prefix, date, status, organization = line.split("|")
879 except ValueError:
880 organization = None
881
882 # Try parsing the line without organization
883 try:
884 address, prefix, date, status = line.split("|")
885 except ValueError:
886 log.warning("Unhandled line format: %s" % line)
887 return
888
889 # Skip anything that isn't properly assigned
890 if not status in ("assigned", "allocated"):
891 return
892
893 # Cast prefix into an integer
894 try:
895 prefix = int(prefix)
896 except:
897 log.warning("Invalid prefix: %s" % prefix)
7177031f 898 return
429a43d1
MT
899
900 # Fix prefix length for IPv4
901 if type == "ipv4":
902 prefix = 32 - int(math.log(prefix, 2))
903
904 # Try to parse the address
905 try:
906 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
907 except ValueError:
908 log.warning("Invalid IP address: %s" % address)
909 return
910
bd341642
PM
911 if not self._check_parsed_network(network):
912 return
913
b6b2b331
PM
914 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
915 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
87b3e102 916 UPDATE SET country = excluded.country",
b6b2b331 917 "%s" % network, country, [country], source_key,
6ffd06b5
MT
918 )
919
92403f39
PM
920 def _import_as_names_from_arin(self):
921 downloader = location.importer.Downloader()
922
923 # XXX: Download AS names file from ARIN (note that these names appear to be quite
924 # technical, not intended for human consumption, as description fields in
925 # organisation handles for other RIRs are - however, this is what we have got,
926 # and in some cases, it might be still better than nothing)
85e44d9a
MT
927 for line in downloader.request_lines("https://ftp.arin.net/info/asn.txt"):
928 # Valid lines start with a space, followed by the number of the Autonomous System ...
929 if not line.startswith(" "):
930 continue
92403f39 931
85e44d9a
MT
932 # Split line and check if there is a valid ASN in it...
933 asn, name = line.split()[0:2]
92403f39 934
85e44d9a
MT
935 try:
936 asn = int(asn)
937 except ValueError:
938 log.debug("Skipping ARIN AS names line not containing an integer for ASN")
939 continue
92403f39 940
85e44d9a
MT
941 # Filter invalid ASNs...
942 if not self._check_parsed_asn(asn):
943 continue
92403f39 944
85e44d9a
MT
945 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
946 if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|LACNIC|RIPE|IANA)(?:\d?$|\-)", name):
947 continue
92403f39 948
85e44d9a
MT
949 # Bail out in case the AS name contains anything we do not expect here...
950 if re.search(r"[^a-zA-Z0-9-_]", name):
951 log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
952 (asn, name))
92403f39 953
85e44d9a
MT
954 # Things look good here, run INSERT statement and skip this one if we already have
955 # a (better?) name for this Autonomous System...
956 self.db.execute("""
957 INSERT INTO autnums(
958 number,
92403f39 959 name,
85e44d9a
MT
960 source
961 ) VALUES (%s, %s, %s)
962 ON CONFLICT (number) DO NOTHING""",
963 asn,
964 name,
965 "ARIN",
966 )
92403f39 967
83d61c46
MT
968 def handle_update_announcements(self, ns):
969 server = ns.server[0]
970
22d8d199
MT
971 with self.db.transaction():
972 if server.startswith("/"):
973 self._handle_update_announcements_from_bird(server)
974 else:
975 self._handle_update_announcements_from_telnet(server)
976
977 # Purge anything we never want here
978 self.db.execute("""
979 -- Delete default routes
980 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
981
982 -- Delete anything that is not global unicast address space
983 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
984
985 -- DELETE "current network" address space
986 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
987
988 -- DELETE local loopback address space
989 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
990
991 -- DELETE RFC 1918 address space
992 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
993 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
994 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
995
996 -- DELETE test, benchmark and documentation address space
997 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
998 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
999 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
1000 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
1001 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
1002
1003 -- DELETE CGNAT address space (RFC 6598)
1004 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
1005
1006 -- DELETE link local address space
1007 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
1008
b4d5b2a6 1009 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
22d8d199 1010 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
b4d5b2a6 1011 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
22d8d199
MT
1012
1013 -- DELETE multicast and reserved address space
1014 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
1015 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
1016
1017 -- Delete networks that are too small to be in the global routing table
1018 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
1019 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
1020
1021 -- Delete any non-public or reserved ASNs
1022 DELETE FROM announcements WHERE NOT (
1023 (autnum >= 1 AND autnum <= 23455)
1024 OR
1025 (autnum >= 23457 AND autnum <= 64495)
1026 OR
1027 (autnum >= 131072 AND autnum <= 4199999999)
1028 );
1029
1030 -- Delete everything that we have not seen for 14 days
1031 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
1032 """)
1033
1034 def _handle_update_announcements_from_bird(self, server):
1035 # Pre-compile the regular expression for faster searching
35e5f041 1036 route = re.compile(b"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
22d8d199
MT
1037
1038 log.info("Requesting routing table from Bird (%s)" % server)
1039
35e5f041
MT
1040 aggregated_networks = []
1041
22d8d199
MT
1042 # Send command to list all routes
1043 for line in self._bird_cmd(server, "show route"):
1044 m = route.match(line)
1045 if not m:
b767db8d
MT
1046 # Skip empty lines
1047 if not line:
1048 pass
1049
45fa97cd 1050 # Ignore any header lines with the name of the routing table
b767db8d
MT
1051 elif line.startswith(b"Table"):
1052 pass
45fa97cd
MT
1053
1054 # Log anything else
b767db8d
MT
1055 else:
1056 log.debug("Could not parse line: %s" % line.decode())
1057
22d8d199
MT
1058 continue
1059
1060 # Fetch the extracted network and ASN
1061 network, autnum = m.groups()
1062
35e5f041
MT
1063 # Decode into strings
1064 if network:
1065 network = network.decode()
1066 if autnum:
1067 autnum = autnum.decode()
1068
1069 # Collect all aggregated networks
1070 if not autnum:
1071 log.debug("%s is an aggregated network" % network)
1072 aggregated_networks.append(network)
1073 continue
1074
22d8d199
MT
1075 # Insert it into the database
1076 self.db.execute("INSERT INTO announcements(network, autnum) \
1077 VALUES(%s, %s) ON CONFLICT (network) DO \
1078 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
35e5f041 1079 network, autnum,
22d8d199
MT
1080 )
1081
35e5f041
MT
1082 # Process any aggregated networks
1083 for network in aggregated_networks:
1084 log.debug("Processing aggregated network %s" % network)
1085
1086 # Run "show route all" for each network
1087 for line in self._bird_cmd(server, "show route %s all" % network):
1088 # Try finding the path
1089 m = re.match(b"\s+BGP\.as_path:.* (\d+) {\d+}$", line)
1090 if m:
1091 # Select the last AS number in the path
1092 autnum = m.group(1).decode()
1093
1094 # Insert it into the database
1095 self.db.execute("INSERT INTO announcements(network, autnum) \
1096 VALUES(%s, %s) ON CONFLICT (network) DO \
1097 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1098 network, autnum,
1099 )
1100
1101 # We don't need to process any more
1102 break
1103
22d8d199 1104 def _handle_update_announcements_from_telnet(self, server):
83d61c46 1105 # Pre-compile regular expression for routes
83d61c46
MT
1106 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
1107
1108 with telnetlib.Telnet(server) as t:
1109 # Enable debug mode
1110 #if ns.debug:
1111 # t.set_debuglevel(10)
1112
1113 # Wait for console greeting
fcd5b8b2
MT
1114 greeting = t.read_until(b"> ", timeout=30)
1115 if not greeting:
1116 log.error("Could not get a console prompt")
1117 return 1
83d61c46
MT
1118
1119 # Disable pagination
1120 t.write(b"terminal length 0\n")
1121
1122 # Wait for the prompt to return
1123 t.read_until(b"> ")
1124
1125 # Fetch the routing tables
22d8d199
MT
1126 for protocol in ("ipv6", "ipv4"):
1127 log.info("Requesting %s routing table" % protocol)
83d61c46 1128
22d8d199
MT
1129 # Request the full unicast routing table
1130 t.write(b"show bgp %s unicast\n" % protocol.encode())
83d61c46 1131
22d8d199
MT
1132 # Read entire header which ends with "Path"
1133 t.read_until(b"Path\r\n")
83d61c46 1134
22d8d199
MT
1135 while True:
1136 # Try reading a full entry
1137 # Those might be broken across multiple lines but ends with i
1138 line = t.read_until(b"i\r\n", timeout=5)
1139 if not line:
1140 break
83d61c46 1141
22d8d199
MT
1142 # Show line for debugging
1143 #log.debug(repr(line))
d773c1bc 1144
22d8d199
MT
1145 # Try finding a route in here
1146 m = route.match(line)
1147 if m:
1148 network, autnum = m.groups()
83d61c46 1149
22d8d199
MT
1150 # Convert network to string
1151 network = network.decode()
83d61c46 1152
22d8d199
MT
1153 # Append /24 for IPv4 addresses
1154 if not "/" in network and not ":" in network:
1155 network = "%s/24" % network
83d61c46 1156
22d8d199
MT
1157 # Convert AS number to integer
1158 autnum = int(autnum)
83d61c46 1159
22d8d199 1160 log.info("Found announcement for %s by %s" % (network, autnum))
83d61c46 1161
22d8d199
MT
1162 self.db.execute("INSERT INTO announcements(network, autnum) \
1163 VALUES(%s, %s) ON CONFLICT (network) DO \
1164 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1165 network, autnum,
1166 )
83d61c46 1167
22d8d199 1168 log.info("Finished reading the %s routing table" % protocol)
1d4e4e8f 1169
22d8d199
MT
1170 def _bird_cmd(self, socket_path, command):
1171 # Connect to the socket
1172 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1173 s.connect(socket_path)
cedee656 1174
22d8d199
MT
1175 # Allocate some buffer
1176 buffer = b""
83d61c46 1177
bbcefb38
MT
1178 log.debug("Sending Bird command: %s" % command)
1179
22d8d199
MT
1180 # Send the command
1181 s.send(b"%s\n" % command.encode())
209c04b6 1182
22d8d199
MT
1183 while True:
1184 # Fill up the buffer
1185 buffer += s.recv(4096)
209c04b6 1186
22d8d199
MT
1187 while True:
1188 # Search for the next newline
1189 pos = buffer.find(b"\n")
209c04b6 1190
22d8d199
MT
1191 # If we cannot find one, we go back and read more data
1192 if pos <= 0:
1193 break
209c04b6 1194
22d8d199
MT
1195 # Cut after the newline character
1196 pos += 1
b89cee80 1197
22d8d199
MT
1198 # Split the line we want and keep the rest in buffer
1199 line, buffer = buffer[:pos], buffer[pos:]
83d61c46 1200
bbcefb38
MT
1201 # Try parsing any status lines
1202 if len(line) > 4 and line[:4].isdigit() and line[4] in (32, 45):
1203 code, delim, line = int(line[:4]), line[4], line[5:]
1204
1205 log.debug("Received response code %s from bird" % code)
1206
1207 # End of output
1208 if code == 0:
1209 return
1210
1211 # Ignore hello line
1212 elif code == 1:
1213 continue
83d61c46 1214
22d8d199
MT
1215 # Otherwise return the line
1216 yield line
83d61c46 1217
d7fc3057
MT
1218 def handle_update_overrides(self, ns):
1219 with self.db.transaction():
68fdeb3b
PM
1220 # Only drop manually created overrides, as we can be reasonably sure to have them,
1221 # and preserve the rest. If appropriate, it is deleted by correspondent functions.
d7fc3057 1222 self.db.execute("""
68fdeb3b
PM
1223 DELETE FROM autnum_overrides WHERE source = 'manual';
1224 DELETE FROM network_overrides WHERE source = 'manual';
d7fc3057
MT
1225 """)
1226
dcef2ba4
PM
1227 # Update overrides for various cloud providers big enough to publish their own IP
1228 # network allocation lists in a machine-readable format...
1229 self._update_overrides_for_aws()
1230
69b3d894
PM
1231 # Update overrides for Spamhaus DROP feeds...
1232 self._update_overrides_for_spamhaus_drop()
1233
d7fc3057
MT
1234 for file in ns.files:
1235 log.info("Reading %s..." % file)
1236
1237 with open(file, "rb") as f:
1238 for type, block in location.importer.read_blocks(f):
1239 if type == "net":
1240 network = block.get("net")
1241 # Try to parse and normalise the network
1242 try:
1243 network = ipaddress.ip_network(network, strict=False)
1244 except ValueError as e:
1245 log.warning("Invalid IP network: %s: %s" % (network, e))
1246 continue
1247
94dfab8c
MT
1248 # Prevent that we overwrite all networks
1249 if network.prefixlen == 0:
1250 log.warning("Skipping %s: You cannot overwrite default" % network)
1251 continue
1252
d7fc3057
MT
1253 self.db.execute("""
1254 INSERT INTO network_overrides(
1255 network,
1256 country,
39ee3120 1257 source,
d7fc3057
MT
1258 is_anonymous_proxy,
1259 is_satellite_provider,
e17e804e
PM
1260 is_anycast,
1261 is_drop
39ee3120 1262 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
d7fc3057
MT
1263 ON CONFLICT (network) DO NOTHING""",
1264 "%s" % network,
1265 block.get("country"),
39ee3120 1266 "manual",
28d29b7c
MT
1267 self._parse_bool(block, "is-anonymous-proxy"),
1268 self._parse_bool(block, "is-satellite-provider"),
1269 self._parse_bool(block, "is-anycast"),
e17e804e 1270 self._parse_bool(block, "drop"),
d7fc3057
MT
1271 )
1272
f476cdfd
MT
1273 elif type == "aut-num":
1274 autnum = block.get("aut-num")
d7fc3057
MT
1275
1276 # Check if AS number begins with "AS"
1277 if not autnum.startswith("AS"):
1278 log.warning("Invalid AS number: %s" % autnum)
1279 continue
1280
1281 # Strip "AS"
1282 autnum = autnum[2:]
1283
1284 self.db.execute("""
1285 INSERT INTO autnum_overrides(
1286 number,
1287 name,
bd1aa6a1 1288 country,
39ee3120 1289 source,
d7fc3057
MT
1290 is_anonymous_proxy,
1291 is_satellite_provider,
e17e804e
PM
1292 is_anycast,
1293 is_drop
39ee3120 1294 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
d7fc3057 1295 ON CONFLICT DO NOTHING""",
bd1aa6a1
MT
1296 autnum,
1297 block.get("name"),
1298 block.get("country"),
39ee3120 1299 "manual",
28d29b7c
MT
1300 self._parse_bool(block, "is-anonymous-proxy"),
1301 self._parse_bool(block, "is-satellite-provider"),
1302 self._parse_bool(block, "is-anycast"),
e17e804e 1303 self._parse_bool(block, "drop"),
d7fc3057
MT
1304 )
1305
1306 else:
03d24a9b 1307 log.warning("Unsupported type: %s" % type)
d7fc3057 1308
dcef2ba4
PM
1309 def _update_overrides_for_aws(self):
1310 # Download Amazon AWS IP allocation file to create overrides...
1311 downloader = location.importer.Downloader()
1312
1313 try:
85e44d9a
MT
1314 # Fetch IP ranges
1315 f = downloader.retrieve("https://ip-ranges.amazonaws.com/ip-ranges.json")
1316
1317 # Parse downloaded file
1318 aws_ip_dump = json.load(f)
dcef2ba4
PM
1319 except Exception as e:
1320 log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
1321 return
1322
68fdeb3b
PM
1323 # At this point, we can assume the downloaded file to be valid
1324 self.db.execute("""
1325 DELETE FROM network_overrides WHERE source = 'Amazon AWS IP feed';
1326 """)
1327
dcef2ba4
PM
1328 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1329 # there seems to be no machine-readable version available of this other than
1330 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1331 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1332 # was helpful here as well.
1333 aws_region_country_map = {
1334 "af-south-1": "ZA",
1335 "ap-east-1": "HK",
1336 "ap-south-1": "IN",
1337 "ap-south-2": "IN",
1338 "ap-northeast-3": "JP",
1339 "ap-northeast-2": "KR",
1340 "ap-southeast-1": "SG",
1341 "ap-southeast-2": "AU",
1342 "ap-southeast-3": "MY",
1343 "ap-southeast-4": "AU",
1344 "ap-northeast-1": "JP",
1345 "ca-central-1": "CA",
1346 "eu-central-1": "DE",
1347 "eu-central-2": "CH",
1348 "eu-west-1": "IE",
1349 "eu-west-2": "GB",
1350 "eu-south-1": "IT",
1351 "eu-south-2": "ES",
1352 "eu-west-3": "FR",
1353 "eu-north-1": "SE",
ad017947 1354 "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
dcef2ba4
PM
1355 "me-central-1": "AE",
1356 "me-south-1": "BH",
1357 "sa-east-1": "BR"
1358 }
1359
1360 # Fetch all valid country codes to check parsed networks aganist...
1361 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
1362 validcountries = []
1363
1364 for row in rows:
1365 validcountries.append(row.country_code)
1366
1367 with self.db.transaction():
1368 for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
1369 try:
1370 network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
1371 except ValueError:
1372 log.warning("Unable to parse line: %s" % snetwork)
1373 continue
1374
1375 # Sanitize parsed networks...
1376 if not self._check_parsed_network(network):
1377 continue
1378
1379 # Determine region of this network...
1380 region = snetwork["region"]
1381 cc = None
1382 is_anycast = False
1383
1384 # Any region name starting with "us-" will get "US" country code assigned straight away...
1385 if region.startswith("us-"):
1386 cc = "US"
1387 elif region.startswith("cn-"):
1388 # ... same goes for China ...
1389 cc = "CN"
1390 elif region == "GLOBAL":
1391 # ... funny region name for anycast-like networks ...
1392 is_anycast = True
1393 elif region in aws_region_country_map:
1394 # ... assign looked up country code otherwise ...
1395 cc = aws_region_country_map[region]
1396 else:
1397 # ... and bail out if we are missing something here
1398 log.warning("Unable to determine country code for line: %s" % snetwork)
1399 continue
1400
1401 # Skip networks with unknown country codes
1402 if not is_anycast and validcountries and cc not in validcountries:
1403 log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1404 (cc, network))
1405 return
1406
1407 # Conduct SQL statement...
1408 self.db.execute("""
1409 INSERT INTO network_overrides(
1410 network,
1411 country,
1412 source,
1413 is_anonymous_proxy,
1414 is_satellite_provider,
1415 is_anycast
1416 ) VALUES (%s, %s, %s, %s, %s, %s)
1417 ON CONFLICT (network) DO NOTHING""",
1418 "%s" % network,
1419 cc,
1420 "Amazon AWS IP feed",
1421 None,
1422 None,
1423 is_anycast,
1424 )
1425
1426
69b3d894
PM
1427 def _update_overrides_for_spamhaus_drop(self):
1428 downloader = location.importer.Downloader()
1429
1430 ip_urls = [
1431 "https://www.spamhaus.org/drop/drop.txt",
1432 "https://www.spamhaus.org/drop/edrop.txt",
1433 "https://www.spamhaus.org/drop/dropv6.txt"
1434 ]
1435
1436 asn_urls = [
1437 "https://www.spamhaus.org/drop/asndrop.txt"
1438 ]
1439
1440 for url in ip_urls:
85e44d9a
MT
1441 # Fetch IP list
1442 f = downloader.retrieve(url)
1443
1444 # Split into lines
1445 fcontent = f.readlines()
69b3d894 1446
aa23e03d
MT
1447 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
1448 # downloads.
1449 if len(fcontent) > 10:
1450 self.db.execute("""
1451 DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list';
1452 DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists';
1453 """)
1454 else:
1455 log.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url)
1456 continue
68fdeb3b 1457
69b3d894
PM
1458 # Iterate through every line, filter comments and add remaining networks to
1459 # the override table in case they are valid...
1460 with self.db.transaction():
1461 for sline in fcontent:
69b3d894
PM
1462 # The response is assumed to be encoded in UTF-8...
1463 sline = sline.decode("utf-8")
1464
1465 # Comments start with a semicolon...
1466 if sline.startswith(";"):
1467 continue
1468
1469 # Extract network and ignore anything afterwards...
1470 try:
1471 network = ipaddress.ip_network(sline.split()[0], strict=False)
1472 except ValueError:
1473 log.error("Unable to parse line: %s" % sline)
1474 continue
1475
1476 # Sanitize parsed networks...
1477 if not self._check_parsed_network(network):
1478 log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
1479 (url, network))
1480 continue
1481
1482 # Conduct SQL statement...
1483 self.db.execute("""
1484 INSERT INTO network_overrides(
1485 network,
1486 source,
1487 is_drop
1488 ) VALUES (%s, %s, %s)
148f68bf 1489 ON CONFLICT (network) DO UPDATE SET is_drop = True""",
69b3d894
PM
1490 "%s" % network,
1491 "Spamhaus DROP lists",
1492 True
1493 )
1494
1495 for url in asn_urls:
85e44d9a
MT
1496 # Fetch URL
1497 f = downloader.retrieve(url)
69b3d894
PM
1498
1499 # Iterate through every line, filter comments and add remaining ASNs to
1500 # the override table in case they are valid...
1501 with self.db.transaction():
85e44d9a 1502 for sline in t.readlines():
69b3d894
PM
1503 # The response is assumed to be encoded in UTF-8...
1504 sline = sline.decode("utf-8")
1505
1506 # Comments start with a semicolon...
1507 if sline.startswith(";"):
1508 continue
1509
1510 # Throw away anything after the first space...
1511 sline = sline.split()[0]
1512
1513 # ... strip the "AS" prefix from it ...
1514 sline = sline.strip("AS")
1515
1516 # ... and convert it into an integer. Voila.
1517 asn = int(sline)
1518
1519 # Filter invalid ASNs...
1520 if not self._check_parsed_asn(asn):
1521 log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
1522 (url, asn))
1523 continue
1524
1525 # Conduct SQL statement...
1526 self.db.execute("""
1527 INSERT INTO autnum_overrides(
1528 number,
1529 source,
1530 is_drop
1531 ) VALUES (%s, %s, %s)
148f68bf 1532 ON CONFLICT (number) DO UPDATE SET is_drop = True""",
69b3d894
PM
1533 "%s" % asn,
1534 "Spamhaus ASN-DROP list",
1535 True
1536 )
1537
28d29b7c
MT
1538 @staticmethod
1539 def _parse_bool(block, key):
1540 val = block.get(key)
1541
1542 # There is no point to proceed when we got None
1543 if val is None:
1544 return
1545
1546 # Convert to lowercase
1547 val = val.lower()
1548
1549 # True
1550 if val in ("yes", "1"):
1551 return True
1552
1553 # False
1554 if val in ("no", "0"):
1555 return False
1556
1557 # Default to None
1558 return None
1559
3ce28dea
MT
1560 @property
1561 def countries(self):
1562 # Fetch all valid country codes to check parsed networks aganist
1563 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
1564
1565 # Return all countries
1566 return [row.country_code for row in rows]
1567
8084b33a
MT
1568 def handle_import_countries(self, ns):
1569 with self.db.transaction():
1570 # Drop all data that we have
1571 self.db.execute("TRUNCATE TABLE countries")
1572
1573 for file in ns.file:
1574 for line in file:
1575 line = line.rstrip()
1576
1577 # Ignore any comments
1578 if line.startswith("#"):
1579 continue
1580
1581 try:
1582 country_code, continent_code, name = line.split(maxsplit=2)
1583 except:
1584 log.warning("Could not parse line: %s" % line)
1585 continue
1586
1587 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
1588 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
1589
6ffd06b5
MT
1590
1591def split_line(line):
1592 key, colon, val = line.partition(":")
1593
1594 # Strip any excess space
1595 key = key.strip()
1596 val = val.strip()
78ff0cf2 1597
6ffd06b5 1598 return key, val
78ff0cf2
MT
1599
1600def main():
1601 # Run the command line interface
1602 c = CLI()
1603 c.run()
1604
1605main()