]> git.ipfire.org Git - people/ms/libloc.git/blame - src/python/location-importer.in
location-importer.in: Do not make things more complicated than they are
[people/ms/libloc.git] / src / python / location-importer.in
CommitLineData
78ff0cf2
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
1814283b 6# Copyright (C) 2020-2021 IPFire Development Team <info@ipfire.org> #
78ff0cf2
MT
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import argparse
6ffd06b5 21import ipaddress
dcef2ba4 22import json
78ff0cf2 23import logging
6ffd06b5
MT
24import math
25import re
22d8d199 26import socket
78ff0cf2 27import sys
83d61c46 28import telnetlib
78ff0cf2
MT
29
30# Load our location module
31import location
29c6fa22 32import location.database
3192b66c 33import location.importer
78ff0cf2
MT
34from location.i18n import _
35
36# Initialise logging
37log = logging.getLogger("location.importer")
38log.propagate = 1
39
43fe570c
PM
40# Define constants
41VALID_ASN_RANGES = (
42 (1, 23455),
43 (23457, 64495),
44 (131072, 4199999999),
45)
46
47
78ff0cf2
MT
48class CLI(object):
49 def parse_cli(self):
50 parser = argparse.ArgumentParser(
51 description=_("Location Importer Command Line Interface"),
52 )
6ffd06b5 53 subparsers = parser.add_subparsers()
78ff0cf2
MT
54
55 # Global configuration flags
56 parser.add_argument("--debug", action="store_true",
57 help=_("Enable debug output"))
bc1f5f53
MT
58 parser.add_argument("--quiet", action="store_true",
59 help=_("Enable quiet mode"))
78ff0cf2
MT
60
61 # version
62 parser.add_argument("--version", action="version",
63 version="%(prog)s @VERSION@")
64
29c6fa22
MT
65 # Database
66 parser.add_argument("--database-host", required=True,
67 help=_("Database Hostname"), metavar=_("HOST"))
68 parser.add_argument("--database-name", required=True,
69 help=_("Database Name"), metavar=_("NAME"))
70 parser.add_argument("--database-username", required=True,
71 help=_("Database Username"), metavar=_("USERNAME"))
72 parser.add_argument("--database-password", required=True,
73 help=_("Database Password"), metavar=_("PASSWORD"))
74
0983f3dd
MT
75 # Write Database
76 write = subparsers.add_parser("write", help=_("Write database to file"))
77 write.set_defaults(func=self.handle_write)
78 write.add_argument("file", nargs=1, help=_("Database File"))
79 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
1164d876 80 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
0983f3dd
MT
81 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
82 write.add_argument("--description", nargs="?", help=_("Sets a description"))
83 write.add_argument("--license", nargs="?", help=_("Sets the license"))
b904896a 84 write.add_argument("--version", type=int, help=_("Database Format Version"))
0983f3dd 85
6ffd06b5
MT
86 # Update WHOIS
87 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
88 update_whois.set_defaults(func=self.handle_update_whois)
89
83d61c46
MT
90 # Update announcements
91 update_announcements = subparsers.add_parser("update-announcements",
92 help=_("Update BGP Annoucements"))
93 update_announcements.set_defaults(func=self.handle_update_announcements)
94 update_announcements.add_argument("server", nargs=1,
95 help=_("Route Server to connect to"), metavar=_("SERVER"))
96
d7fc3057
MT
97 # Update overrides
98 update_overrides = subparsers.add_parser("update-overrides",
99 help=_("Update overrides"),
100 )
101 update_overrides.add_argument(
102 "files", nargs="+", help=_("Files to import"),
103 )
104 update_overrides.set_defaults(func=self.handle_update_overrides)
105
8084b33a
MT
106 # Import countries
107 import_countries = subparsers.add_parser("import-countries",
108 help=_("Import countries"),
109 )
110 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
111 help=_("File to import"))
112 import_countries.set_defaults(func=self.handle_import_countries)
113
78ff0cf2
MT
114 args = parser.parse_args()
115
bc1f5f53 116 # Configure logging
78ff0cf2 117 if args.debug:
f9de5e61 118 location.logger.set_level(logging.DEBUG)
bc1f5f53
MT
119 elif args.quiet:
120 location.logger.set_level(logging.WARNING)
78ff0cf2 121
6ffd06b5
MT
122 # Print usage if no action was given
123 if not "func" in args:
124 parser.print_usage()
125 sys.exit(2)
126
78ff0cf2
MT
127 return args
128
129 def run(self):
130 # Parse command line arguments
131 args = self.parse_cli()
132
29c6fa22 133 # Initialise database
6ffd06b5 134 self.db = self._setup_database(args)
29c6fa22 135
78ff0cf2 136 # Call function
6ffd06b5 137 ret = args.func(args)
78ff0cf2
MT
138
139 # Return with exit code
140 if ret:
141 sys.exit(ret)
142
143 # Otherwise just exit
144 sys.exit(0)
145
29c6fa22
MT
146 def _setup_database(self, ns):
147 """
148 Initialise the database
149 """
150 # Connect to database
151 db = location.database.Connection(
152 host=ns.database_host, database=ns.database_name,
153 user=ns.database_username, password=ns.database_password,
154 )
155
156 with db.transaction():
157 db.execute("""
83d61c46
MT
158 -- announcements
159 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
160 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
161 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
162 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
163 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
a1707d89 164 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
83d61c46 165
6ffd06b5 166 -- autnums
0983f3dd 167 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
26f06e70 168 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
6ffd06b5
MT
169 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
170
8084b33a
MT
171 -- countries
172 CREATE TABLE IF NOT EXISTS countries(
173 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
174 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
175
429a43d1 176 -- networks
83d61c46 177 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
b6b2b331 178 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
26f06e70 179 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
429a43d1 180 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
002deb6b 181 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
83d61c46 182 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
d7fc3057
MT
183
184 -- overrides
185 CREATE TABLE IF NOT EXISTS autnum_overrides(
186 number bigint NOT NULL,
187 name text,
bd1aa6a1 188 country text,
b8e25b71
MT
189 is_anonymous_proxy boolean,
190 is_satellite_provider boolean,
191 is_anycast boolean
d7fc3057
MT
192 );
193 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
194 ON autnum_overrides(number);
39ee3120 195 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
e17e804e 196 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
d7fc3057
MT
197
198 CREATE TABLE IF NOT EXISTS network_overrides(
199 network inet NOT NULL,
200 country text,
b8e25b71
MT
201 is_anonymous_proxy boolean,
202 is_satellite_provider boolean,
203 is_anycast boolean
d7fc3057
MT
204 );
205 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
206 ON network_overrides(network);
991baf53
MT
207 CREATE INDEX IF NOT EXISTS network_overrides_search
208 ON network_overrides USING GIST(network inet_ops);
39ee3120 209 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
e17e804e 210 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
29c6fa22
MT
211 """)
212
213 return db
214
0983f3dd
MT
215 def handle_write(self, ns):
216 """
217 Compiles a database in libloc format out of what is in the database
218 """
0983f3dd 219 # Allocate a writer
1164d876 220 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
0983f3dd
MT
221
222 # Set all metadata
223 if ns.vendor:
224 writer.vendor = ns.vendor
225
226 if ns.description:
227 writer.description = ns.description
228
229 if ns.license:
230 writer.license = ns.license
231
232 # Add all Autonomous Systems
233 log.info("Writing Autonomous Systems...")
234
235 # Select all ASes with a name
6e97c44b
MT
236 rows = self.db.query("""
237 SELECT
238 autnums.number AS number,
239 COALESCE(
240 (SELECT overrides.name FROM autnum_overrides overrides
241 WHERE overrides.number = autnums.number),
242 autnums.name
243 ) AS name
244 FROM autnums
245 WHERE name <> %s ORDER BY number
246 """, "")
0983f3dd
MT
247
248 for row in rows:
249 a = writer.add_as(row.number)
250 a.name = row.name
251
252 # Add all networks
253 log.info("Writing networks...")
254
255 # Select all known networks
256 rows = self.db.query("""
5372d9c7
MT
257 -- Return a list of those networks enriched with all
258 -- other information that we store in the database
0983f3dd 259 SELECT
bbea93a7
MT
260 DISTINCT ON (network)
261 network,
262 autnum,
bd1aa6a1
MT
263
264 -- Country
265 COALESCE(
266 (
267 SELECT country FROM network_overrides overrides
bbea93a7 268 WHERE networks.network <<= overrides.network
bd1aa6a1
MT
269 ORDER BY masklen(overrides.network) DESC
270 LIMIT 1
271 ),
272 (
273 SELECT country FROM autnum_overrides overrides
bbea93a7 274 WHERE networks.autnum = overrides.number
bd1aa6a1
MT
275 ),
276 networks.country
277 ) AS country,
8e8555bb 278
0983f3dd 279 -- Flags
1422b5d4
MT
280 COALESCE(
281 (
282 SELECT is_anonymous_proxy FROM network_overrides overrides
bbea93a7 283 WHERE networks.network <<= overrides.network
1422b5d4
MT
284 ORDER BY masklen(overrides.network) DESC
285 LIMIT 1
286 ),
287 (
288 SELECT is_anonymous_proxy FROM autnum_overrides overrides
bbea93a7 289 WHERE networks.autnum = overrides.number
b8e25b71
MT
290 ),
291 FALSE
1422b5d4
MT
292 ) AS is_anonymous_proxy,
293 COALESCE(
294 (
295 SELECT is_satellite_provider FROM network_overrides overrides
bbea93a7 296 WHERE networks.network <<= overrides.network
1422b5d4
MT
297 ORDER BY masklen(overrides.network) DESC
298 LIMIT 1
299 ),
300 (
301 SELECT is_satellite_provider FROM autnum_overrides overrides
bbea93a7 302 WHERE networks.autnum = overrides.number
b8e25b71
MT
303 ),
304 FALSE
1422b5d4
MT
305 ) AS is_satellite_provider,
306 COALESCE(
307 (
308 SELECT is_anycast FROM network_overrides overrides
bbea93a7 309 WHERE networks.network <<= overrides.network
1422b5d4
MT
310 ORDER BY masklen(overrides.network) DESC
311 LIMIT 1
312 ),
313 (
314 SELECT is_anycast FROM autnum_overrides overrides
bbea93a7 315 WHERE networks.autnum = overrides.number
b8e25b71
MT
316 ),
317 FALSE
e17e804e
PM
318 ) AS is_anycast,
319 COALESCE(
320 (
321 SELECT is_drop FROM network_overrides overrides
322 WHERE networks.network <<= overrides.network
323 ORDER BY masklen(overrides.network) DESC
324 LIMIT 1
325 ),
326 (
327 SELECT is_drop FROM autnum_overrides overrides
328 WHERE networks.autnum = overrides.number
329 ),
330 FALSE
331 ) AS is_drop
bbea93a7
MT
332 FROM (
333 SELECT
334 known_networks.network AS network,
335 announcements.autnum AS autnum,
336 networks.country AS country,
337
338 -- Must be part of returned values for ORDER BY clause
339 masklen(announcements.network) AS sort_a,
340 masklen(networks.network) AS sort_b
341 FROM (
342 SELECT network FROM announcements
343 UNION ALL
344 SELECT network FROM networks
345 UNION ALL
346 SELECT network FROM network_overrides
347 ) known_networks
348 LEFT JOIN
349 announcements ON known_networks.network <<= announcements.network
350 LEFT JOIN
351 networks ON known_networks.network <<= networks.network
352 ORDER BY
353 known_networks.network,
354 sort_a DESC,
355 sort_b DESC
356 ) networks
0983f3dd
MT
357 """)
358
359 for row in rows:
360 network = writer.add_network(row.network)
361
5372d9c7
MT
362 # Save country
363 if row.country:
364 network.country_code = row.country
365
366 # Save ASN
367 if row.autnum:
368 network.asn = row.autnum
0983f3dd
MT
369
370 # Set flags
371 if row.is_anonymous_proxy:
372 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
373
374 if row.is_satellite_provider:
375 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
376
377 if row.is_anycast:
378 network.set_flag(location.NETWORK_FLAG_ANYCAST)
379
e17e804e
PM
380 if row.is_drop:
381 network.set_flag(location.NETWORK_FLAG_DROP)
382
8084b33a
MT
383 # Add all countries
384 log.info("Writing countries...")
385 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
386
387 for row in rows:
388 c = writer.add_country(row.country_code)
389 c.continent_code = row.continent_code
390 c.name = row.name
391
0983f3dd
MT
392 # Write everything to file
393 log.info("Writing database to file...")
394 for file in ns.file:
395 writer.write(file)
396
6ffd06b5
MT
397 def handle_update_whois(self, ns):
398 downloader = location.importer.Downloader()
399
400 # Download all sources
0365119d
MT
401 with self.db.transaction():
402 # Create some temporary tables to store parsed data
403 self.db.execute("""
426e0bee 404 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL, organization text NOT NULL, source text NOT NULL)
0365119d
MT
405 ON COMMIT DROP;
406 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
407
426e0bee 408 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL, name text NOT NULL, source text NOT NULL)
0365119d
MT
409 ON COMMIT DROP;
410 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
002deb6b 411
b6b2b331 412 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL)
002deb6b
PM
413 ON COMMIT DROP;
414 CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
415 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
416 """)
417
418 # Remove all previously imported content
419 self.db.execute("""
420 TRUNCATE TABLE networks;
0365119d
MT
421 """)
422
84b175e2
PM
423 # Fetch all valid country codes to check parsed networks aganist...
424 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
425 validcountries = []
426
427 for row in rows:
428 validcountries.append(row.country_code)
429
28b08385
PM
430 for source_key in location.importer.WHOIS_SOURCES:
431 for single_url in location.importer.WHOIS_SOURCES[source_key]:
432 with downloader.request(single_url, return_blocks=True) as f:
433 for block in f:
434 self._parse_block(block, source_key, validcountries)
6ffd06b5 435
002deb6b
PM
436 # Process all parsed networks from every RIR we happen to have access to,
437 # insert the largest network chunks into the networks table immediately...
438 families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
439
440 for family in (row.family for row in families):
441 smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
442
b6b2b331
PM
443 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
444 SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
002deb6b
PM
445
446 # ... determine any other prefixes for this network family, ...
447 prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
448 WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family)
449
450 # ... and insert networks with this prefix in case they provide additional
451 # information (i. e. subnet of a larger chunk with a different country)
452 for prefix in (row.prefix for row in prefixes):
453 self.db.execute("""
454 WITH candidates AS (
455 SELECT
456 _rirdata.network,
28b08385 457 _rirdata.country,
b6b2b331 458 _rirdata.original_countries,
28b08385 459 _rirdata.source
002deb6b
PM
460 FROM
461 _rirdata
462 WHERE
463 family(_rirdata.network) = %s
464 AND
465 masklen(_rirdata.network) = %s
466 ),
467 filtered AS (
468 SELECT
469 DISTINCT ON (c.network)
470 c.network,
471 c.country,
b6b2b331 472 c.original_countries,
28b08385 473 c.source,
002deb6b
PM
474 masklen(networks.network),
475 networks.country AS parent_country
476 FROM
477 candidates c
478 LEFT JOIN
479 networks
480 ON
481 c.network << networks.network
482 ORDER BY
483 c.network,
484 masklen(networks.network) DESC NULLS LAST
485 )
486 INSERT INTO
b6b2b331 487 networks(network, country, original_countries, source)
002deb6b
PM
488 SELECT
489 network,
28b08385 490 country,
b6b2b331 491 original_countries,
28b08385 492 source
002deb6b
PM
493 FROM
494 filtered
495 WHERE
496 parent_country IS NULL
497 OR
498 country <> parent_country
499 ON CONFLICT DO NOTHING""",
500 family, prefix,
501 )
502
0365119d 503 self.db.execute("""
28b08385
PM
504 INSERT INTO autnums(number, name, source)
505 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
2cd2e342 506 JOIN _organizations ON _autnums.organization = _organizations.handle
ee6ea398 507 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
0365119d
MT
508 """)
509
429a43d1 510 # Download all extended sources
28b08385
PM
511 for source_key in location.importer.EXTENDED_SOURCES:
512 for single_url in location.importer.EXTENDED_SOURCES[source_key]:
513 with self.db.transaction():
514 # Download data
515 with downloader.request(single_url) as f:
516 for line in f:
517 self._parse_line(line, source_key, validcountries)
429a43d1 518
92403f39
PM
519 # Download and import (technical) AS names from ARIN
520 self._import_as_names_from_arin()
521
bd341642
PM
522 def _check_parsed_network(self, network):
523 """
524 Assistive function to detect and subsequently sort out parsed
525 networks from RIR data (both Whois and so-called "extended sources"),
526 which are or have...
527
528 (a) not globally routable (RFC 1918 space, et al.)
529 (b) covering a too large chunk of the IP address space (prefix length
530 is < 7 for IPv4 networks, and < 10 for IPv6)
531 (c) "0.0.0.0" or "::" as a network address
532 (d) are too small for being publicly announced (we have decided not to
533 process them at the moment, as they significantly enlarge our
534 database without providing very helpful additional information)
535
536 This unfortunately is necessary due to brain-dead clutter across
537 various RIR databases, causing mismatches and eventually disruptions.
538
539 We will return False in case a network is not suitable for adding
540 it to our database, and True otherwise.
541 """
542
543 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
544 return False
545
546 if not network.is_global:
2ba6ed07 547 log.debug("Skipping non-globally routable network: %s" % network)
bd341642
PM
548 return False
549
550 if network.version == 4:
551 if network.prefixlen < 7:
2ba6ed07 552 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
553 return False
554
555 if network.prefixlen > 24:
ebb087cf 556 log.debug("Skipping network too small to be publicly announced: %s" % network)
bd341642
PM
557 return False
558
559 if str(network.network_address) == "0.0.0.0":
2ba6ed07 560 log.debug("Skipping network based on 0.0.0.0: %s" % network)
bd341642
PM
561 return False
562
563 elif network.version == 6:
564 if network.prefixlen < 10:
2ba6ed07 565 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
566 return False
567
568 if network.prefixlen > 48:
ebb087cf 569 log.debug("Skipping network too small to be publicly announced: %s" % network)
bd341642
PM
570 return False
571
572 if str(network.network_address) == "::":
2ba6ed07 573 log.debug("Skipping network based on '::': %s" % network)
bd341642
PM
574 return False
575
576 else:
577 # This should not happen...
84187ab5 578 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
bd341642
PM
579 return False
580
581 # In case we have made it here, the network is considered to
582 # be suitable for libloc consumption...
583 return True
584
43fe570c
PM
585 def _check_parsed_asn(self, asn):
586 """
587 Assistive function to filter Autonomous System Numbers not being suitable
588 for adding to our database. Returns False in such cases, and True otherwise.
589 """
590
591 for start, end in VALID_ASN_RANGES:
592 if start <= asn and end >= asn:
593 return True
594
595 log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
596 return False
597
28b08385 598 def _parse_block(self, block, source_key, validcountries = None):
6ffd06b5
MT
599 # Get first line to find out what type of block this is
600 line = block[0]
601
6ffd06b5 602 # aut-num
429a43d1 603 if line.startswith("aut-num:"):
28b08385 604 return self._parse_autnum_block(block, source_key)
6ffd06b5 605
aadac4c5
PM
606 # inetnum
607 if line.startswith("inet6num:") or line.startswith("inetnum:"):
28b08385 608 return self._parse_inetnum_block(block, source_key, validcountries)
aadac4c5 609
6ffd06b5
MT
610 # organisation
611 elif line.startswith("organisation:"):
28b08385 612 return self._parse_org_block(block, source_key)
6ffd06b5 613
28b08385 614 def _parse_autnum_block(self, block, source_key):
6ffd06b5
MT
615 autnum = {}
616 for line in block:
617 # Split line
618 key, val = split_line(line)
619
620 if key == "aut-num":
621 m = re.match(r"^(AS|as)(\d+)", val)
622 if m:
623 autnum["asn"] = m.group(2)
624
0365119d 625 elif key == "org":
e7d612e5 626 autnum[key] = val.upper()
6ffd06b5 627
426e0bee
PM
628 elif key == "descr":
629 # Save the first description line as well...
630 if not key in autnum:
631 autnum[key] = val
632
6ffd06b5 633 # Skip empty objects
426e0bee 634 if not autnum or not "asn" in autnum:
6ffd06b5
MT
635 return
636
426e0bee
PM
637 # Insert a dummy organisation handle into our temporary organisations
638 # table in case the AS does not have an organisation handle set, but
639 # has a description (a quirk often observed in APNIC area), so we can
640 # later display at least some string for this AS.
641 if not "org" in autnum:
642 if "descr" in autnum:
643 autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn")
644
645 self.db.execute("INSERT INTO _organizations(handle, name, source) \
646 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
647 autnum.get("org"), autnum.get("descr"), source_key,
648 )
649 else:
650 log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
651 autnum.get("asn"))
652 return
653
6ffd06b5 654 # Insert into database
28b08385
PM
655 self.db.execute("INSERT INTO _autnums(number, organization, source) \
656 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
0365119d 657 organization = excluded.organization",
28b08385 658 autnum.get("asn"), autnum.get("org"), source_key,
6ffd06b5
MT
659 )
660
28b08385 661 def _parse_inetnum_block(self, block, source_key, validcountries = None):
84187ab5 662 log.debug("Parsing inetnum block:")
aadac4c5
PM
663
664 inetnum = {}
665 for line in block:
84187ab5 666 log.debug(line)
aadac4c5
PM
667
668 # Split line
669 key, val = split_line(line)
670
84187ab5
PM
671 # Filter any inetnum records which are only referring to IP space
672 # not managed by that specific RIR...
673 if key == "netname":
5254e5fc 674 if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()):
2ba6ed07 675 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
84187ab5
PM
676 return
677
aadac4c5
PM
678 if key == "inetnum":
679 start_address, delim, end_address = val.partition("-")
680
681 # Strip any excess space
682 start_address, end_address = start_address.rstrip(), end_address.strip()
683
845da577
PM
684 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
685 if start_address and not (delim or end_address):
686 try:
687 start_address = ipaddress.ip_network(start_address, strict=False)
688 except ValueError:
689 start_address = start_address.split("/")
38d0faea 690 ldigits = start_address[0].count(".")
845da577
PM
691
692 # How many octets do we need to add?
693 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
694 if ldigits == 2:
695 start_address = start_address[0] + ".0.0/" + start_address[1]
696 elif ldigits == 3:
697 start_address = start_address[0] + ".0/" + start_address[1]
698 else:
699 log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line)
700 return
701
702 try:
703 start_address = ipaddress.ip_network(start_address, strict=False)
704 except ValueError:
705 log.warning("Could not parse line in LACNIC DB format: %s" % line)
706 return
707
708 # Enumerate first and last IP address of this network
709 end_address = start_address[-1]
710 start_address = start_address[0]
711
712 else:
713 # Convert to IP address
714 try:
715 start_address = ipaddress.ip_address(start_address)
716 end_address = ipaddress.ip_address(end_address)
717 except ValueError:
718 log.warning("Could not parse line: %s" % line)
719 return
aadac4c5 720
1814283b 721 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
aadac4c5
PM
722
723 elif key == "inet6num":
1814283b 724 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
aadac4c5
PM
725
726 elif key == "country":
7434e5e0
PM
727 val = val.upper()
728
b6b2b331 729 # Catch RIR data objects with more than one country code...
7434e5e0 730 if not key in inetnum:
b6b2b331
PM
731 inetnum[key] = []
732 else:
7434e5e0 733 if val in inetnum.get("country"):
b6b2b331
PM
734 # ... but keep this list distinct...
735 continue
736
01e5f0ff
PM
737 # When people set country codes to "UK", they actually mean "GB"
738 if val == "UK":
739 val = "GB"
740
7434e5e0 741 inetnum[key].append(val)
aadac4c5
PM
742
743 # Skip empty objects
002deb6b 744 if not inetnum or not "country" in inetnum:
aadac4c5
PM
745 return
746
b6b2b331
PM
747 # Prepare skipping objects with unknown country codes...
748 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
749
1814283b
PM
750 # Iterate through all networks enumerated from above, check them for plausibility and insert
751 # them into the database, if _check_parsed_network() succeeded
752 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
753 if self._check_parsed_network(single_network):
7138b4ac 754
b6b2b331
PM
755 # Skip objects with unknown country codes if they are valid to avoid log spam...
756 if validcountries and invalidcountries:
757 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
758 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
48770ca8 759 break
7138b4ac
PM
760
761 # Everything is fine here, run INSERT statement...
b6b2b331
PM
762 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
763 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
764 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
1814283b 765 )
aadac4c5 766
28b08385 767 def _parse_org_block(self, block, source_key):
6ffd06b5
MT
768 org = {}
769 for line in block:
770 # Split line
771 key, val = split_line(line)
772
e7d612e5
PM
773 if key == "organisation":
774 org[key] = val.upper()
775 elif key == "org-name":
6ffd06b5
MT
776 org[key] = val
777
778 # Skip empty objects
779 if not org:
780 return
781
28b08385
PM
782 self.db.execute("INSERT INTO _organizations(handle, name, source) \
783 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
0365119d 784 UPDATE SET name = excluded.name",
28b08385 785 org.get("organisation"), org.get("org-name"), source_key,
6ffd06b5
MT
786 )
787
28b08385 788 def _parse_line(self, line, source_key, validcountries = None):
429a43d1
MT
789 # Skip version line
790 if line.startswith("2"):
791 return
6ffd06b5 792
429a43d1
MT
793 # Skip comments
794 if line.startswith("#"):
795 return
6ffd06b5 796
429a43d1
MT
797 try:
798 registry, country_code, type, line = line.split("|", 3)
799 except:
800 log.warning("Could not parse line: %s" % line)
801 return
6ffd06b5 802
84b175e2
PM
803 # Skip any lines that are for stats only or do not have a country
804 # code at all (avoids log spam below)
805 if not country_code or country_code == '*':
806 return
807
808 # Skip objects with unknown country codes
809 if validcountries and country_code not in validcountries:
810 log.warning("Skipping line with bogus country '%s': %s" % \
811 (country_code, line))
6ffd06b5
MT
812 return
813
429a43d1 814 if type in ("ipv6", "ipv4"):
28b08385 815 return self._parse_ip_line(country_code, type, line, source_key)
429a43d1 816
28b08385 817 def _parse_ip_line(self, country, type, line, source_key):
429a43d1
MT
818 try:
819 address, prefix, date, status, organization = line.split("|")
820 except ValueError:
821 organization = None
822
823 # Try parsing the line without organization
824 try:
825 address, prefix, date, status = line.split("|")
826 except ValueError:
827 log.warning("Unhandled line format: %s" % line)
828 return
829
830 # Skip anything that isn't properly assigned
831 if not status in ("assigned", "allocated"):
832 return
833
834 # Cast prefix into an integer
835 try:
836 prefix = int(prefix)
837 except:
838 log.warning("Invalid prefix: %s" % prefix)
7177031f 839 return
429a43d1
MT
840
841 # Fix prefix length for IPv4
842 if type == "ipv4":
843 prefix = 32 - int(math.log(prefix, 2))
844
845 # Try to parse the address
846 try:
847 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
848 except ValueError:
849 log.warning("Invalid IP address: %s" % address)
850 return
851
bd341642
PM
852 if not self._check_parsed_network(network):
853 return
854
b6b2b331
PM
855 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
856 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
87b3e102 857 UPDATE SET country = excluded.country",
b6b2b331 858 "%s" % network, country, [country], source_key,
6ffd06b5
MT
859 )
860
92403f39
PM
861 def _import_as_names_from_arin(self):
862 downloader = location.importer.Downloader()
863
864 # XXX: Download AS names file from ARIN (note that these names appear to be quite
865 # technical, not intended for human consumption, as description fields in
866 # organisation handles for other RIRs are - however, this is what we have got,
867 # and in some cases, it might be still better than nothing)
868 with downloader.request("https://ftp.arin.net/info/asn.txt", return_blocks=False) as f:
869 for line in f:
870 # Convert binary line to string...
871 line = str(line)
872
873 # ... valid lines start with a space, followed by the number of the Autonomous System ...
874 if not line.startswith(" "):
875 continue
876
877 # Split line and check if there is a valid ASN in it...
878 asn, name = line.split()[0:2]
879
880 try:
881 asn = int(asn)
882 except ValueError:
883 log.debug("Skipping ARIN AS names line not containing an integer for ASN")
884 continue
885
43fe570c
PM
886 # Filter invalid ASNs...
887 if not self._check_parsed_asn(asn):
92403f39
PM
888 continue
889
890 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
378b4c0a 891 if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|DNIC|LACNIC|RIPE|IANA)(?:\d?$|\-)", name):
92403f39
PM
892 continue
893
894 # Bail out in case the AS name contains anything we do not expect here...
895 if re.search(r"[^a-zA-Z0-9-_]", name):
896 log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
897 (asn, name))
898
899 # Things look good here, run INSERT statement and skip this one if we already have
900 # a (better?) name for this Autonomous System...
901 self.db.execute("""
902 INSERT INTO autnums(
903 number,
904 name,
905 source
906 ) VALUES (%s, %s, %s)
907 ON CONFLICT (number) DO NOTHING""",
908 asn,
909 name,
910 "ARIN",
911 )
912
83d61c46
MT
913 def handle_update_announcements(self, ns):
914 server = ns.server[0]
915
22d8d199
MT
916 with self.db.transaction():
917 if server.startswith("/"):
918 self._handle_update_announcements_from_bird(server)
919 else:
920 self._handle_update_announcements_from_telnet(server)
921
922 # Purge anything we never want here
923 self.db.execute("""
924 -- Delete default routes
925 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
926
927 -- Delete anything that is not global unicast address space
928 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
929
930 -- DELETE "current network" address space
931 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
932
933 -- DELETE local loopback address space
934 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
935
936 -- DELETE RFC 1918 address space
937 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
938 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
939 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
940
941 -- DELETE test, benchmark and documentation address space
942 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
943 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
944 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
945 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
946 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
947
948 -- DELETE CGNAT address space (RFC 6598)
949 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
950
951 -- DELETE link local address space
952 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
953
b4d5b2a6 954 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
22d8d199 955 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
b4d5b2a6 956 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
22d8d199
MT
957
958 -- DELETE multicast and reserved address space
959 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
960 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
961
962 -- Delete networks that are too small to be in the global routing table
963 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
964 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
965
966 -- Delete any non-public or reserved ASNs
967 DELETE FROM announcements WHERE NOT (
968 (autnum >= 1 AND autnum <= 23455)
969 OR
970 (autnum >= 23457 AND autnum <= 64495)
971 OR
972 (autnum >= 131072 AND autnum <= 4199999999)
973 );
974
975 -- Delete everything that we have not seen for 14 days
976 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
977 """)
978
979 def _handle_update_announcements_from_bird(self, server):
980 # Pre-compile the regular expression for faster searching
dc0be5c5 981 route = re.compile(b"^\s(.+?)\s+.+?\[AS(.*?).\]$")
22d8d199
MT
982
983 log.info("Requesting routing table from Bird (%s)" % server)
984
985 # Send command to list all routes
986 for line in self._bird_cmd(server, "show route"):
987 m = route.match(line)
988 if not m:
989 log.debug("Could not parse line: %s" % line.decode())
990 continue
991
992 # Fetch the extracted network and ASN
993 network, autnum = m.groups()
994
995 # Insert it into the database
996 self.db.execute("INSERT INTO announcements(network, autnum) \
997 VALUES(%s, %s) ON CONFLICT (network) DO \
998 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
999 network.decode(), autnum.decode(),
1000 )
1001
1002 def _handle_update_announcements_from_telnet(self, server):
83d61c46 1003 # Pre-compile regular expression for routes
83d61c46
MT
1004 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
1005
1006 with telnetlib.Telnet(server) as t:
1007 # Enable debug mode
1008 #if ns.debug:
1009 # t.set_debuglevel(10)
1010
1011 # Wait for console greeting
fcd5b8b2
MT
1012 greeting = t.read_until(b"> ", timeout=30)
1013 if not greeting:
1014 log.error("Could not get a console prompt")
1015 return 1
83d61c46
MT
1016
1017 # Disable pagination
1018 t.write(b"terminal length 0\n")
1019
1020 # Wait for the prompt to return
1021 t.read_until(b"> ")
1022
1023 # Fetch the routing tables
22d8d199
MT
1024 for protocol in ("ipv6", "ipv4"):
1025 log.info("Requesting %s routing table" % protocol)
83d61c46 1026
22d8d199
MT
1027 # Request the full unicast routing table
1028 t.write(b"show bgp %s unicast\n" % protocol.encode())
83d61c46 1029
22d8d199
MT
1030 # Read entire header which ends with "Path"
1031 t.read_until(b"Path\r\n")
83d61c46 1032
22d8d199
MT
1033 while True:
1034 # Try reading a full entry
1035 # Those might be broken across multiple lines but ends with i
1036 line = t.read_until(b"i\r\n", timeout=5)
1037 if not line:
1038 break
83d61c46 1039
22d8d199
MT
1040 # Show line for debugging
1041 #log.debug(repr(line))
d773c1bc 1042
22d8d199
MT
1043 # Try finding a route in here
1044 m = route.match(line)
1045 if m:
1046 network, autnum = m.groups()
83d61c46 1047
22d8d199
MT
1048 # Convert network to string
1049 network = network.decode()
83d61c46 1050
22d8d199
MT
1051 # Append /24 for IPv4 addresses
1052 if not "/" in network and not ":" in network:
1053 network = "%s/24" % network
83d61c46 1054
22d8d199
MT
1055 # Convert AS number to integer
1056 autnum = int(autnum)
83d61c46 1057
22d8d199 1058 log.info("Found announcement for %s by %s" % (network, autnum))
83d61c46 1059
22d8d199
MT
1060 self.db.execute("INSERT INTO announcements(network, autnum) \
1061 VALUES(%s, %s) ON CONFLICT (network) DO \
1062 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1063 network, autnum,
1064 )
83d61c46 1065
22d8d199 1066 log.info("Finished reading the %s routing table" % protocol)
1d4e4e8f 1067
22d8d199
MT
1068 def _bird_cmd(self, socket_path, command):
1069 # Connect to the socket
1070 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1071 s.connect(socket_path)
cedee656 1072
22d8d199
MT
1073 # Allocate some buffer
1074 buffer = b""
83d61c46 1075
22d8d199
MT
1076 # Send the command
1077 s.send(b"%s\n" % command.encode())
209c04b6 1078
22d8d199
MT
1079 while True:
1080 # Fill up the buffer
1081 buffer += s.recv(4096)
209c04b6 1082
22d8d199
MT
1083 while True:
1084 # Search for the next newline
1085 pos = buffer.find(b"\n")
209c04b6 1086
22d8d199
MT
1087 # If we cannot find one, we go back and read more data
1088 if pos <= 0:
1089 break
209c04b6 1090
22d8d199
MT
1091 # Cut after the newline character
1092 pos += 1
b89cee80 1093
22d8d199
MT
1094 # Split the line we want and keep the rest in buffer
1095 line, buffer = buffer[:pos], buffer[pos:]
83d61c46 1096
22d8d199
MT
1097 # Look for the end-of-output indicator
1098 if line == b"0000 \n":
1099 return
83d61c46 1100
22d8d199
MT
1101 # Otherwise return the line
1102 yield line
83d61c46 1103
d7fc3057
MT
1104 def handle_update_overrides(self, ns):
1105 with self.db.transaction():
1106 # Drop all data that we have
1107 self.db.execute("""
1108 TRUNCATE TABLE autnum_overrides;
1109 TRUNCATE TABLE network_overrides;
1110 """)
1111
dcef2ba4
PM
1112 # Update overrides for various cloud providers big enough to publish their own IP
1113 # network allocation lists in a machine-readable format...
1114 self._update_overrides_for_aws()
1115
69b3d894
PM
1116 # Update overrides for Spamhaus DROP feeds...
1117 self._update_overrides_for_spamhaus_drop()
1118
d7fc3057
MT
1119 for file in ns.files:
1120 log.info("Reading %s..." % file)
1121
1122 with open(file, "rb") as f:
1123 for type, block in location.importer.read_blocks(f):
1124 if type == "net":
1125 network = block.get("net")
1126 # Try to parse and normalise the network
1127 try:
1128 network = ipaddress.ip_network(network, strict=False)
1129 except ValueError as e:
1130 log.warning("Invalid IP network: %s: %s" % (network, e))
1131 continue
1132
94dfab8c
MT
1133 # Prevent that we overwrite all networks
1134 if network.prefixlen == 0:
1135 log.warning("Skipping %s: You cannot overwrite default" % network)
1136 continue
1137
d7fc3057
MT
1138 self.db.execute("""
1139 INSERT INTO network_overrides(
1140 network,
1141 country,
39ee3120 1142 source,
d7fc3057
MT
1143 is_anonymous_proxy,
1144 is_satellite_provider,
e17e804e
PM
1145 is_anycast,
1146 is_drop
39ee3120 1147 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
d7fc3057
MT
1148 ON CONFLICT (network) DO NOTHING""",
1149 "%s" % network,
1150 block.get("country"),
39ee3120 1151 "manual",
28d29b7c
MT
1152 self._parse_bool(block, "is-anonymous-proxy"),
1153 self._parse_bool(block, "is-satellite-provider"),
1154 self._parse_bool(block, "is-anycast"),
e17e804e 1155 self._parse_bool(block, "drop"),
d7fc3057
MT
1156 )
1157
f476cdfd
MT
1158 elif type == "aut-num":
1159 autnum = block.get("aut-num")
d7fc3057
MT
1160
1161 # Check if AS number begins with "AS"
1162 if not autnum.startswith("AS"):
1163 log.warning("Invalid AS number: %s" % autnum)
1164 continue
1165
1166 # Strip "AS"
1167 autnum = autnum[2:]
1168
1169 self.db.execute("""
1170 INSERT INTO autnum_overrides(
1171 number,
1172 name,
bd1aa6a1 1173 country,
39ee3120 1174 source,
d7fc3057
MT
1175 is_anonymous_proxy,
1176 is_satellite_provider,
e17e804e
PM
1177 is_anycast,
1178 is_drop
39ee3120 1179 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
d7fc3057 1180 ON CONFLICT DO NOTHING""",
bd1aa6a1
MT
1181 autnum,
1182 block.get("name"),
1183 block.get("country"),
39ee3120 1184 "manual",
28d29b7c
MT
1185 self._parse_bool(block, "is-anonymous-proxy"),
1186 self._parse_bool(block, "is-satellite-provider"),
1187 self._parse_bool(block, "is-anycast"),
e17e804e 1188 self._parse_bool(block, "drop"),
d7fc3057
MT
1189 )
1190
1191 else:
03d24a9b 1192 log.warning("Unsupported type: %s" % type)
d7fc3057 1193
dcef2ba4
PM
1194 def _update_overrides_for_aws(self):
1195 # Download Amazon AWS IP allocation file to create overrides...
1196 downloader = location.importer.Downloader()
1197
1198 try:
1199 with downloader.request("https://ip-ranges.amazonaws.com/ip-ranges.json", return_blocks=False) as f:
1200 aws_ip_dump = json.load(f.body)
1201 except Exception as e:
1202 log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
1203 return
1204
1205 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1206 # there seems to be no machine-readable version available of this other than
1207 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1208 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1209 # was helpful here as well.
1210 aws_region_country_map = {
1211 "af-south-1": "ZA",
1212 "ap-east-1": "HK",
1213 "ap-south-1": "IN",
1214 "ap-south-2": "IN",
1215 "ap-northeast-3": "JP",
1216 "ap-northeast-2": "KR",
1217 "ap-southeast-1": "SG",
1218 "ap-southeast-2": "AU",
1219 "ap-southeast-3": "MY",
1220 "ap-southeast-4": "AU",
1221 "ap-northeast-1": "JP",
1222 "ca-central-1": "CA",
1223 "eu-central-1": "DE",
1224 "eu-central-2": "CH",
1225 "eu-west-1": "IE",
1226 "eu-west-2": "GB",
1227 "eu-south-1": "IT",
1228 "eu-south-2": "ES",
1229 "eu-west-3": "FR",
1230 "eu-north-1": "SE",
ad017947 1231 "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
dcef2ba4
PM
1232 "me-central-1": "AE",
1233 "me-south-1": "BH",
1234 "sa-east-1": "BR"
1235 }
1236
1237 # Fetch all valid country codes to check parsed networks aganist...
1238 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
1239 validcountries = []
1240
1241 for row in rows:
1242 validcountries.append(row.country_code)
1243
1244 with self.db.transaction():
1245 for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
1246 try:
1247 network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
1248 except ValueError:
1249 log.warning("Unable to parse line: %s" % snetwork)
1250 continue
1251
1252 # Sanitize parsed networks...
1253 if not self._check_parsed_network(network):
1254 continue
1255
1256 # Determine region of this network...
1257 region = snetwork["region"]
1258 cc = None
1259 is_anycast = False
1260
1261 # Any region name starting with "us-" will get "US" country code assigned straight away...
1262 if region.startswith("us-"):
1263 cc = "US"
1264 elif region.startswith("cn-"):
1265 # ... same goes for China ...
1266 cc = "CN"
1267 elif region == "GLOBAL":
1268 # ... funny region name for anycast-like networks ...
1269 is_anycast = True
1270 elif region in aws_region_country_map:
1271 # ... assign looked up country code otherwise ...
1272 cc = aws_region_country_map[region]
1273 else:
1274 # ... and bail out if we are missing something here
1275 log.warning("Unable to determine country code for line: %s" % snetwork)
1276 continue
1277
1278 # Skip networks with unknown country codes
1279 if not is_anycast and validcountries and cc not in validcountries:
1280 log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1281 (cc, network))
1282 return
1283
1284 # Conduct SQL statement...
1285 self.db.execute("""
1286 INSERT INTO network_overrides(
1287 network,
1288 country,
1289 source,
1290 is_anonymous_proxy,
1291 is_satellite_provider,
1292 is_anycast
1293 ) VALUES (%s, %s, %s, %s, %s, %s)
1294 ON CONFLICT (network) DO NOTHING""",
1295 "%s" % network,
1296 cc,
1297 "Amazon AWS IP feed",
1298 None,
1299 None,
1300 is_anycast,
1301 )
1302
1303
69b3d894
PM
1304 def _update_overrides_for_spamhaus_drop(self):
1305 downloader = location.importer.Downloader()
1306
1307 ip_urls = [
1308 "https://www.spamhaus.org/drop/drop.txt",
1309 "https://www.spamhaus.org/drop/edrop.txt",
1310 "https://www.spamhaus.org/drop/dropv6.txt"
1311 ]
1312
1313 asn_urls = [
1314 "https://www.spamhaus.org/drop/asndrop.txt"
1315 ]
1316
1317 for url in ip_urls:
1318 try:
1319 with downloader.request(url, return_blocks=False) as f:
1320 fcontent = f.body.readlines()
1321 except Exception as e:
1322 log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e))
1323 return
1324
1325 # Iterate through every line, filter comments and add remaining networks to
1326 # the override table in case they are valid...
1327 with self.db.transaction():
1328 for sline in fcontent:
1329
1330 # The response is assumed to be encoded in UTF-8...
1331 sline = sline.decode("utf-8")
1332
1333 # Comments start with a semicolon...
1334 if sline.startswith(";"):
1335 continue
1336
1337 # Extract network and ignore anything afterwards...
1338 try:
1339 network = ipaddress.ip_network(sline.split()[0], strict=False)
1340 except ValueError:
1341 log.error("Unable to parse line: %s" % sline)
1342 continue
1343
1344 # Sanitize parsed networks...
1345 if not self._check_parsed_network(network):
1346 log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
1347 (url, network))
1348 continue
1349
1350 # Conduct SQL statement...
1351 self.db.execute("""
1352 INSERT INTO network_overrides(
1353 network,
1354 source,
1355 is_drop
1356 ) VALUES (%s, %s, %s)
148f68bf 1357 ON CONFLICT (network) DO UPDATE SET is_drop = True""",
69b3d894
PM
1358 "%s" % network,
1359 "Spamhaus DROP lists",
1360 True
1361 )
1362
1363 for url in asn_urls:
1364 try:
1365 with downloader.request(url, return_blocks=False) as f:
1366 fcontent = f.body.readlines()
1367 except Exception as e:
1368 log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e))
1369 return
1370
1371 # Iterate through every line, filter comments and add remaining ASNs to
1372 # the override table in case they are valid...
1373 with self.db.transaction():
1374 for sline in fcontent:
1375
1376 # The response is assumed to be encoded in UTF-8...
1377 sline = sline.decode("utf-8")
1378
1379 # Comments start with a semicolon...
1380 if sline.startswith(";"):
1381 continue
1382
1383 # Throw away anything after the first space...
1384 sline = sline.split()[0]
1385
1386 # ... strip the "AS" prefix from it ...
1387 sline = sline.strip("AS")
1388
1389 # ... and convert it into an integer. Voila.
1390 asn = int(sline)
1391
1392 # Filter invalid ASNs...
1393 if not self._check_parsed_asn(asn):
1394 log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
1395 (url, asn))
1396 continue
1397
1398 # Conduct SQL statement...
1399 self.db.execute("""
1400 INSERT INTO autnum_overrides(
1401 number,
1402 source,
1403 is_drop
1404 ) VALUES (%s, %s, %s)
148f68bf 1405 ON CONFLICT (number) DO UPDATE SET is_drop = True""",
69b3d894
PM
1406 "%s" % asn,
1407 "Spamhaus ASN-DROP list",
1408 True
1409 )
1410
28d29b7c
MT
1411 @staticmethod
1412 def _parse_bool(block, key):
1413 val = block.get(key)
1414
1415 # There is no point to proceed when we got None
1416 if val is None:
1417 return
1418
1419 # Convert to lowercase
1420 val = val.lower()
1421
1422 # True
1423 if val in ("yes", "1"):
1424 return True
1425
1426 # False
1427 if val in ("no", "0"):
1428 return False
1429
1430 # Default to None
1431 return None
1432
8084b33a
MT
1433 def handle_import_countries(self, ns):
1434 with self.db.transaction():
1435 # Drop all data that we have
1436 self.db.execute("TRUNCATE TABLE countries")
1437
1438 for file in ns.file:
1439 for line in file:
1440 line = line.rstrip()
1441
1442 # Ignore any comments
1443 if line.startswith("#"):
1444 continue
1445
1446 try:
1447 country_code, continent_code, name = line.split(maxsplit=2)
1448 except:
1449 log.warning("Could not parse line: %s" % line)
1450 continue
1451
1452 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
1453 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
1454
6ffd06b5
MT
1455
1456def split_line(line):
1457 key, colon, val = line.partition(":")
1458
1459 # Strip any excess space
1460 key = key.strip()
1461 val = val.strip()
78ff0cf2 1462
6ffd06b5 1463 return key, val
78ff0cf2
MT
1464
1465def main():
1466 # Run the command line interface
1467 c = CLI()
1468 c.run()
1469
1470main()