]> git.ipfire.org Git - people/ms/libloc.git/blame - src/python/location-importer.in
importer: Improve performance of network export query
[people/ms/libloc.git] / src / python / location-importer.in
CommitLineData
78ff0cf2
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
2ad36187 6# Copyright (C) 2020-2022 IPFire Development Team <info@ipfire.org> #
78ff0cf2
MT
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import argparse
6ffd06b5 21import ipaddress
dcef2ba4 22import json
78ff0cf2 23import logging
6ffd06b5
MT
24import math
25import re
22d8d199 26import socket
78ff0cf2 27import sys
83d61c46 28import telnetlib
78ff0cf2
MT
29
30# Load our location module
31import location
29c6fa22 32import location.database
3192b66c 33import location.importer
78ff0cf2
MT
34from location.i18n import _
35
36# Initialise logging
37log = logging.getLogger("location.importer")
38log.propagate = 1
39
43fe570c
PM
40# Define constants
41VALID_ASN_RANGES = (
42 (1, 23455),
43 (23457, 64495),
44 (131072, 4199999999),
45)
46
47
78ff0cf2
MT
48class CLI(object):
49 def parse_cli(self):
50 parser = argparse.ArgumentParser(
51 description=_("Location Importer Command Line Interface"),
52 )
6ffd06b5 53 subparsers = parser.add_subparsers()
78ff0cf2
MT
54
55 # Global configuration flags
56 parser.add_argument("--debug", action="store_true",
57 help=_("Enable debug output"))
bc1f5f53
MT
58 parser.add_argument("--quiet", action="store_true",
59 help=_("Enable quiet mode"))
78ff0cf2
MT
60
61 # version
62 parser.add_argument("--version", action="version",
63 version="%(prog)s @VERSION@")
64
29c6fa22
MT
65 # Database
66 parser.add_argument("--database-host", required=True,
67 help=_("Database Hostname"), metavar=_("HOST"))
68 parser.add_argument("--database-name", required=True,
69 help=_("Database Name"), metavar=_("NAME"))
70 parser.add_argument("--database-username", required=True,
71 help=_("Database Username"), metavar=_("USERNAME"))
72 parser.add_argument("--database-password", required=True,
73 help=_("Database Password"), metavar=_("PASSWORD"))
74
0983f3dd
MT
75 # Write Database
76 write = subparsers.add_parser("write", help=_("Write database to file"))
77 write.set_defaults(func=self.handle_write)
78 write.add_argument("file", nargs=1, help=_("Database File"))
79 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
1164d876 80 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
0983f3dd
MT
81 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
82 write.add_argument("--description", nargs="?", help=_("Sets a description"))
83 write.add_argument("--license", nargs="?", help=_("Sets the license"))
b904896a 84 write.add_argument("--version", type=int, help=_("Database Format Version"))
0983f3dd 85
6ffd06b5
MT
86 # Update WHOIS
87 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
88 update_whois.set_defaults(func=self.handle_update_whois)
89
83d61c46
MT
90 # Update announcements
91 update_announcements = subparsers.add_parser("update-announcements",
92 help=_("Update BGP Annoucements"))
93 update_announcements.set_defaults(func=self.handle_update_announcements)
94 update_announcements.add_argument("server", nargs=1,
95 help=_("Route Server to connect to"), metavar=_("SERVER"))
96
d7fc3057
MT
97 # Update overrides
98 update_overrides = subparsers.add_parser("update-overrides",
99 help=_("Update overrides"),
100 )
101 update_overrides.add_argument(
102 "files", nargs="+", help=_("Files to import"),
103 )
104 update_overrides.set_defaults(func=self.handle_update_overrides)
105
8084b33a
MT
106 # Import countries
107 import_countries = subparsers.add_parser("import-countries",
108 help=_("Import countries"),
109 )
110 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
111 help=_("File to import"))
112 import_countries.set_defaults(func=self.handle_import_countries)
113
78ff0cf2
MT
114 args = parser.parse_args()
115
bc1f5f53 116 # Configure logging
78ff0cf2 117 if args.debug:
f9de5e61 118 location.logger.set_level(logging.DEBUG)
bc1f5f53
MT
119 elif args.quiet:
120 location.logger.set_level(logging.WARNING)
78ff0cf2 121
6ffd06b5
MT
122 # Print usage if no action was given
123 if not "func" in args:
124 parser.print_usage()
125 sys.exit(2)
126
78ff0cf2
MT
127 return args
128
129 def run(self):
130 # Parse command line arguments
131 args = self.parse_cli()
132
29c6fa22 133 # Initialise database
6ffd06b5 134 self.db = self._setup_database(args)
29c6fa22 135
78ff0cf2 136 # Call function
6ffd06b5 137 ret = args.func(args)
78ff0cf2
MT
138
139 # Return with exit code
140 if ret:
141 sys.exit(ret)
142
143 # Otherwise just exit
144 sys.exit(0)
145
29c6fa22
MT
146 def _setup_database(self, ns):
147 """
148 Initialise the database
149 """
150 # Connect to database
151 db = location.database.Connection(
152 host=ns.database_host, database=ns.database_name,
153 user=ns.database_username, password=ns.database_password,
154 )
155
156 with db.transaction():
157 db.execute("""
83d61c46
MT
158 -- announcements
159 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
160 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
161 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
162 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
163 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
a1707d89 164 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
83d61c46 165
6ffd06b5 166 -- autnums
0983f3dd 167 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
26f06e70 168 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
6ffd06b5
MT
169 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
170
8084b33a
MT
171 -- countries
172 CREATE TABLE IF NOT EXISTS countries(
173 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
174 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
175
429a43d1 176 -- networks
83d61c46 177 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
b6b2b331 178 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
26f06e70 179 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
429a43d1 180 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
002deb6b 181 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
83d61c46 182 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
d7fc3057
MT
183
184 -- overrides
185 CREATE TABLE IF NOT EXISTS autnum_overrides(
186 number bigint NOT NULL,
187 name text,
bd1aa6a1 188 country text,
b8e25b71
MT
189 is_anonymous_proxy boolean,
190 is_satellite_provider boolean,
191 is_anycast boolean
d7fc3057
MT
192 );
193 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
194 ON autnum_overrides(number);
39ee3120 195 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
e17e804e 196 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
d7fc3057
MT
197
198 CREATE TABLE IF NOT EXISTS network_overrides(
199 network inet NOT NULL,
200 country text,
b8e25b71
MT
201 is_anonymous_proxy boolean,
202 is_satellite_provider boolean,
203 is_anycast boolean
d7fc3057
MT
204 );
205 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
206 ON network_overrides(network);
991baf53
MT
207 CREATE INDEX IF NOT EXISTS network_overrides_search
208 ON network_overrides USING GIST(network inet_ops);
39ee3120 209 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
e17e804e 210 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
29c6fa22
MT
211 """)
212
213 return db
214
0983f3dd
MT
215 def handle_write(self, ns):
216 """
217 Compiles a database in libloc format out of what is in the database
218 """
0983f3dd 219 # Allocate a writer
1164d876 220 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
0983f3dd
MT
221
222 # Set all metadata
223 if ns.vendor:
224 writer.vendor = ns.vendor
225
226 if ns.description:
227 writer.description = ns.description
228
229 if ns.license:
230 writer.license = ns.license
231
232 # Add all Autonomous Systems
233 log.info("Writing Autonomous Systems...")
234
235 # Select all ASes with a name
6e97c44b
MT
236 rows = self.db.query("""
237 SELECT
238 autnums.number AS number,
239 COALESCE(
240 (SELECT overrides.name FROM autnum_overrides overrides
241 WHERE overrides.number = autnums.number),
242 autnums.name
243 ) AS name
244 FROM autnums
245 WHERE name <> %s ORDER BY number
246 """, "")
0983f3dd
MT
247
248 for row in rows:
249 a = writer.add_as(row.number)
250 a.name = row.name
251
252 # Add all networks
253 log.info("Writing networks...")
254
255 # Select all known networks
256 rows = self.db.query("""
295a7774
MT
257 WITH known_networks AS (
258 SELECT network FROM announcements
259 UNION
260 SELECT network FROM networks
261 UNION
262 SELECT network FROM network_overrides
263 ),
264
265 ordered_networks AS (
266 SELECT
267 known_networks.network AS network,
268 announcements.autnum AS autnum,
269 networks.country AS country,
270
271 -- Must be part of returned values for ORDER BY clause
272 masklen(announcements.network) AS sort_a,
273 masklen(networks.network) AS sort_b
274 FROM
275 known_networks
276 LEFT JOIN
277 announcements ON known_networks.network <<= announcements.network
278 LEFT JOIN
279 networks ON known_networks.network <<= networks.network
280 ORDER BY
281 known_networks.network,
282 sort_a DESC,
283 sort_b DESC
284 )
285
5372d9c7
MT
286 -- Return a list of those networks enriched with all
287 -- other information that we store in the database
0983f3dd 288 SELECT
bbea93a7
MT
289 DISTINCT ON (network)
290 network,
291 autnum,
bd1aa6a1
MT
292
293 -- Country
294 COALESCE(
295 (
296 SELECT country FROM network_overrides overrides
bbea93a7 297 WHERE networks.network <<= overrides.network
bd1aa6a1
MT
298 ORDER BY masklen(overrides.network) DESC
299 LIMIT 1
300 ),
301 (
302 SELECT country FROM autnum_overrides overrides
bbea93a7 303 WHERE networks.autnum = overrides.number
bd1aa6a1
MT
304 ),
305 networks.country
306 ) AS country,
8e8555bb 307
0983f3dd 308 -- Flags
1422b5d4
MT
309 COALESCE(
310 (
311 SELECT is_anonymous_proxy FROM network_overrides overrides
bbea93a7 312 WHERE networks.network <<= overrides.network
1422b5d4
MT
313 ORDER BY masklen(overrides.network) DESC
314 LIMIT 1
315 ),
316 (
317 SELECT is_anonymous_proxy FROM autnum_overrides overrides
bbea93a7 318 WHERE networks.autnum = overrides.number
b8e25b71
MT
319 ),
320 FALSE
1422b5d4
MT
321 ) AS is_anonymous_proxy,
322 COALESCE(
323 (
324 SELECT is_satellite_provider FROM network_overrides overrides
bbea93a7 325 WHERE networks.network <<= overrides.network
1422b5d4
MT
326 ORDER BY masklen(overrides.network) DESC
327 LIMIT 1
328 ),
329 (
330 SELECT is_satellite_provider FROM autnum_overrides overrides
bbea93a7 331 WHERE networks.autnum = overrides.number
b8e25b71
MT
332 ),
333 FALSE
1422b5d4
MT
334 ) AS is_satellite_provider,
335 COALESCE(
336 (
337 SELECT is_anycast FROM network_overrides overrides
bbea93a7 338 WHERE networks.network <<= overrides.network
1422b5d4
MT
339 ORDER BY masklen(overrides.network) DESC
340 LIMIT 1
341 ),
342 (
343 SELECT is_anycast FROM autnum_overrides overrides
bbea93a7 344 WHERE networks.autnum = overrides.number
b8e25b71
MT
345 ),
346 FALSE
e17e804e
PM
347 ) AS is_anycast,
348 COALESCE(
349 (
350 SELECT is_drop FROM network_overrides overrides
351 WHERE networks.network <<= overrides.network
352 ORDER BY masklen(overrides.network) DESC
353 LIMIT 1
354 ),
355 (
356 SELECT is_drop FROM autnum_overrides overrides
357 WHERE networks.autnum = overrides.number
358 ),
359 FALSE
360 ) AS is_drop
295a7774
MT
361 FROM
362 ordered_networks networks
0983f3dd
MT
363 """)
364
365 for row in rows:
366 network = writer.add_network(row.network)
367
5372d9c7
MT
368 # Save country
369 if row.country:
370 network.country_code = row.country
371
372 # Save ASN
373 if row.autnum:
374 network.asn = row.autnum
0983f3dd
MT
375
376 # Set flags
377 if row.is_anonymous_proxy:
378 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
379
380 if row.is_satellite_provider:
381 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
382
383 if row.is_anycast:
384 network.set_flag(location.NETWORK_FLAG_ANYCAST)
385
e17e804e
PM
386 if row.is_drop:
387 network.set_flag(location.NETWORK_FLAG_DROP)
388
8084b33a
MT
389 # Add all countries
390 log.info("Writing countries...")
391 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
392
393 for row in rows:
394 c = writer.add_country(row.country_code)
395 c.continent_code = row.continent_code
396 c.name = row.name
397
0983f3dd
MT
398 # Write everything to file
399 log.info("Writing database to file...")
400 for file in ns.file:
401 writer.write(file)
402
6ffd06b5
MT
403 def handle_update_whois(self, ns):
404 downloader = location.importer.Downloader()
405
406 # Download all sources
0365119d
MT
407 with self.db.transaction():
408 # Create some temporary tables to store parsed data
409 self.db.execute("""
426e0bee 410 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL, organization text NOT NULL, source text NOT NULL)
0365119d
MT
411 ON COMMIT DROP;
412 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
413
426e0bee 414 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL, name text NOT NULL, source text NOT NULL)
0365119d
MT
415 ON COMMIT DROP;
416 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
002deb6b 417
b6b2b331 418 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL)
002deb6b
PM
419 ON COMMIT DROP;
420 CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
421 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
422 """)
423
424 # Remove all previously imported content
425 self.db.execute("""
426 TRUNCATE TABLE networks;
0365119d
MT
427 """)
428
84b175e2
PM
429 # Fetch all valid country codes to check parsed networks aganist...
430 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
431 validcountries = []
432
433 for row in rows:
434 validcountries.append(row.country_code)
435
28b08385
PM
436 for source_key in location.importer.WHOIS_SOURCES:
437 for single_url in location.importer.WHOIS_SOURCES[source_key]:
438 with downloader.request(single_url, return_blocks=True) as f:
439 for block in f:
440 self._parse_block(block, source_key, validcountries)
6ffd06b5 441
002deb6b
PM
442 # Process all parsed networks from every RIR we happen to have access to,
443 # insert the largest network chunks into the networks table immediately...
444 families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
445
446 for family in (row.family for row in families):
447 smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
448
b6b2b331
PM
449 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
450 SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
002deb6b
PM
451
452 # ... determine any other prefixes for this network family, ...
453 prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
454 WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family)
455
456 # ... and insert networks with this prefix in case they provide additional
457 # information (i. e. subnet of a larger chunk with a different country)
458 for prefix in (row.prefix for row in prefixes):
459 self.db.execute("""
460 WITH candidates AS (
461 SELECT
462 _rirdata.network,
28b08385 463 _rirdata.country,
b6b2b331 464 _rirdata.original_countries,
28b08385 465 _rirdata.source
002deb6b
PM
466 FROM
467 _rirdata
468 WHERE
469 family(_rirdata.network) = %s
470 AND
471 masklen(_rirdata.network) = %s
472 ),
473 filtered AS (
474 SELECT
475 DISTINCT ON (c.network)
476 c.network,
477 c.country,
b6b2b331 478 c.original_countries,
28b08385 479 c.source,
002deb6b
PM
480 masklen(networks.network),
481 networks.country AS parent_country
482 FROM
483 candidates c
484 LEFT JOIN
485 networks
486 ON
487 c.network << networks.network
488 ORDER BY
489 c.network,
490 masklen(networks.network) DESC NULLS LAST
491 )
492 INSERT INTO
b6b2b331 493 networks(network, country, original_countries, source)
002deb6b
PM
494 SELECT
495 network,
28b08385 496 country,
b6b2b331 497 original_countries,
28b08385 498 source
002deb6b
PM
499 FROM
500 filtered
501 WHERE
502 parent_country IS NULL
503 OR
504 country <> parent_country
505 ON CONFLICT DO NOTHING""",
506 family, prefix,
507 )
508
0365119d 509 self.db.execute("""
28b08385
PM
510 INSERT INTO autnums(number, name, source)
511 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
2cd2e342 512 JOIN _organizations ON _autnums.organization = _organizations.handle
ee6ea398 513 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
0365119d
MT
514 """)
515
429a43d1 516 # Download all extended sources
28b08385
PM
517 for source_key in location.importer.EXTENDED_SOURCES:
518 for single_url in location.importer.EXTENDED_SOURCES[source_key]:
519 with self.db.transaction():
520 # Download data
521 with downloader.request(single_url) as f:
522 for line in f:
523 self._parse_line(line, source_key, validcountries)
429a43d1 524
92403f39
PM
525 # Download and import (technical) AS names from ARIN
526 self._import_as_names_from_arin()
527
bd341642
PM
528 def _check_parsed_network(self, network):
529 """
530 Assistive function to detect and subsequently sort out parsed
531 networks from RIR data (both Whois and so-called "extended sources"),
532 which are or have...
533
534 (a) not globally routable (RFC 1918 space, et al.)
535 (b) covering a too large chunk of the IP address space (prefix length
536 is < 7 for IPv4 networks, and < 10 for IPv6)
537 (c) "0.0.0.0" or "::" as a network address
538 (d) are too small for being publicly announced (we have decided not to
539 process them at the moment, as they significantly enlarge our
540 database without providing very helpful additional information)
541
542 This unfortunately is necessary due to brain-dead clutter across
543 various RIR databases, causing mismatches and eventually disruptions.
544
545 We will return False in case a network is not suitable for adding
546 it to our database, and True otherwise.
547 """
548
549 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
550 return False
551
552 if not network.is_global:
2ba6ed07 553 log.debug("Skipping non-globally routable network: %s" % network)
bd341642
PM
554 return False
555
556 if network.version == 4:
557 if network.prefixlen < 7:
2ba6ed07 558 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
559 return False
560
561 if network.prefixlen > 24:
ebb087cf 562 log.debug("Skipping network too small to be publicly announced: %s" % network)
bd341642
PM
563 return False
564
565 if str(network.network_address) == "0.0.0.0":
2ba6ed07 566 log.debug("Skipping network based on 0.0.0.0: %s" % network)
bd341642
PM
567 return False
568
569 elif network.version == 6:
570 if network.prefixlen < 10:
2ba6ed07 571 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
572 return False
573
574 if network.prefixlen > 48:
ebb087cf 575 log.debug("Skipping network too small to be publicly announced: %s" % network)
bd341642
PM
576 return False
577
578 if str(network.network_address) == "::":
2ba6ed07 579 log.debug("Skipping network based on '::': %s" % network)
bd341642
PM
580 return False
581
582 else:
583 # This should not happen...
84187ab5 584 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
bd341642
PM
585 return False
586
587 # In case we have made it here, the network is considered to
588 # be suitable for libloc consumption...
589 return True
590
43fe570c
PM
591 def _check_parsed_asn(self, asn):
592 """
593 Assistive function to filter Autonomous System Numbers not being suitable
594 for adding to our database. Returns False in such cases, and True otherwise.
595 """
596
597 for start, end in VALID_ASN_RANGES:
598 if start <= asn and end >= asn:
599 return True
600
601 log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
602 return False
603
28b08385 604 def _parse_block(self, block, source_key, validcountries = None):
6ffd06b5
MT
605 # Get first line to find out what type of block this is
606 line = block[0]
607
6ffd06b5 608 # aut-num
429a43d1 609 if line.startswith("aut-num:"):
28b08385 610 return self._parse_autnum_block(block, source_key)
6ffd06b5 611
aadac4c5
PM
612 # inetnum
613 if line.startswith("inet6num:") or line.startswith("inetnum:"):
28b08385 614 return self._parse_inetnum_block(block, source_key, validcountries)
aadac4c5 615
6ffd06b5
MT
616 # organisation
617 elif line.startswith("organisation:"):
28b08385 618 return self._parse_org_block(block, source_key)
6ffd06b5 619
28b08385 620 def _parse_autnum_block(self, block, source_key):
6ffd06b5
MT
621 autnum = {}
622 for line in block:
623 # Split line
624 key, val = split_line(line)
625
626 if key == "aut-num":
627 m = re.match(r"^(AS|as)(\d+)", val)
628 if m:
629 autnum["asn"] = m.group(2)
630
0365119d 631 elif key == "org":
e7d612e5 632 autnum[key] = val.upper()
6ffd06b5 633
426e0bee
PM
634 elif key == "descr":
635 # Save the first description line as well...
636 if not key in autnum:
637 autnum[key] = val
638
6ffd06b5 639 # Skip empty objects
426e0bee 640 if not autnum or not "asn" in autnum:
6ffd06b5
MT
641 return
642
426e0bee
PM
643 # Insert a dummy organisation handle into our temporary organisations
644 # table in case the AS does not have an organisation handle set, but
645 # has a description (a quirk often observed in APNIC area), so we can
646 # later display at least some string for this AS.
647 if not "org" in autnum:
648 if "descr" in autnum:
649 autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn")
650
651 self.db.execute("INSERT INTO _organizations(handle, name, source) \
652 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
653 autnum.get("org"), autnum.get("descr"), source_key,
654 )
655 else:
656 log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
657 autnum.get("asn"))
658 return
659
6ffd06b5 660 # Insert into database
28b08385
PM
661 self.db.execute("INSERT INTO _autnums(number, organization, source) \
662 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
0365119d 663 organization = excluded.organization",
28b08385 664 autnum.get("asn"), autnum.get("org"), source_key,
6ffd06b5
MT
665 )
666
28b08385 667 def _parse_inetnum_block(self, block, source_key, validcountries = None):
84187ab5 668 log.debug("Parsing inetnum block:")
aadac4c5
PM
669
670 inetnum = {}
671 for line in block:
84187ab5 672 log.debug(line)
aadac4c5
PM
673
674 # Split line
675 key, val = split_line(line)
676
84187ab5
PM
677 # Filter any inetnum records which are only referring to IP space
678 # not managed by that specific RIR...
679 if key == "netname":
5254e5fc 680 if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()):
2ba6ed07 681 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
84187ab5
PM
682 return
683
aadac4c5
PM
684 if key == "inetnum":
685 start_address, delim, end_address = val.partition("-")
686
687 # Strip any excess space
688 start_address, end_address = start_address.rstrip(), end_address.strip()
689
845da577
PM
690 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
691 if start_address and not (delim or end_address):
692 try:
693 start_address = ipaddress.ip_network(start_address, strict=False)
694 except ValueError:
695 start_address = start_address.split("/")
38d0faea 696 ldigits = start_address[0].count(".")
845da577
PM
697
698 # How many octets do we need to add?
699 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
2ad36187 700 if ldigits == 1:
845da577 701 start_address = start_address[0] + ".0.0/" + start_address[1]
2ad36187 702 elif ldigits == 2:
845da577
PM
703 start_address = start_address[0] + ".0/" + start_address[1]
704 else:
705 log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line)
706 return
707
708 try:
709 start_address = ipaddress.ip_network(start_address, strict=False)
710 except ValueError:
711 log.warning("Could not parse line in LACNIC DB format: %s" % line)
712 return
713
714 # Enumerate first and last IP address of this network
715 end_address = start_address[-1]
716 start_address = start_address[0]
717
718 else:
719 # Convert to IP address
720 try:
721 start_address = ipaddress.ip_address(start_address)
722 end_address = ipaddress.ip_address(end_address)
723 except ValueError:
724 log.warning("Could not parse line: %s" % line)
725 return
aadac4c5 726
1814283b 727 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
aadac4c5
PM
728
729 elif key == "inet6num":
1814283b 730 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
aadac4c5
PM
731
732 elif key == "country":
7434e5e0
PM
733 val = val.upper()
734
b6b2b331 735 # Catch RIR data objects with more than one country code...
7434e5e0 736 if not key in inetnum:
b6b2b331
PM
737 inetnum[key] = []
738 else:
7434e5e0 739 if val in inetnum.get("country"):
b6b2b331
PM
740 # ... but keep this list distinct...
741 continue
742
01e5f0ff
PM
743 # When people set country codes to "UK", they actually mean "GB"
744 if val == "UK":
745 val = "GB"
746
7434e5e0 747 inetnum[key].append(val)
aadac4c5
PM
748
749 # Skip empty objects
002deb6b 750 if not inetnum or not "country" in inetnum:
aadac4c5
PM
751 return
752
b6b2b331
PM
753 # Prepare skipping objects with unknown country codes...
754 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
755
1814283b
PM
756 # Iterate through all networks enumerated from above, check them for plausibility and insert
757 # them into the database, if _check_parsed_network() succeeded
758 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
759 if self._check_parsed_network(single_network):
7138b4ac 760
b6b2b331
PM
761 # Skip objects with unknown country codes if they are valid to avoid log spam...
762 if validcountries and invalidcountries:
763 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
764 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
48770ca8 765 break
7138b4ac
PM
766
767 # Everything is fine here, run INSERT statement...
b6b2b331
PM
768 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
769 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
770 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
1814283b 771 )
aadac4c5 772
28b08385 773 def _parse_org_block(self, block, source_key):
6ffd06b5
MT
774 org = {}
775 for line in block:
776 # Split line
777 key, val = split_line(line)
778
e7d612e5
PM
779 if key == "organisation":
780 org[key] = val.upper()
781 elif key == "org-name":
6ffd06b5
MT
782 org[key] = val
783
784 # Skip empty objects
785 if not org:
786 return
787
28b08385
PM
788 self.db.execute("INSERT INTO _organizations(handle, name, source) \
789 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
0365119d 790 UPDATE SET name = excluded.name",
28b08385 791 org.get("organisation"), org.get("org-name"), source_key,
6ffd06b5
MT
792 )
793
28b08385 794 def _parse_line(self, line, source_key, validcountries = None):
429a43d1
MT
795 # Skip version line
796 if line.startswith("2"):
797 return
6ffd06b5 798
429a43d1
MT
799 # Skip comments
800 if line.startswith("#"):
801 return
6ffd06b5 802
429a43d1
MT
803 try:
804 registry, country_code, type, line = line.split("|", 3)
805 except:
806 log.warning("Could not parse line: %s" % line)
807 return
6ffd06b5 808
84b175e2
PM
809 # Skip any lines that are for stats only or do not have a country
810 # code at all (avoids log spam below)
811 if not country_code or country_code == '*':
812 return
813
814 # Skip objects with unknown country codes
815 if validcountries and country_code not in validcountries:
816 log.warning("Skipping line with bogus country '%s': %s" % \
817 (country_code, line))
6ffd06b5
MT
818 return
819
429a43d1 820 if type in ("ipv6", "ipv4"):
28b08385 821 return self._parse_ip_line(country_code, type, line, source_key)
429a43d1 822
28b08385 823 def _parse_ip_line(self, country, type, line, source_key):
429a43d1
MT
824 try:
825 address, prefix, date, status, organization = line.split("|")
826 except ValueError:
827 organization = None
828
829 # Try parsing the line without organization
830 try:
831 address, prefix, date, status = line.split("|")
832 except ValueError:
833 log.warning("Unhandled line format: %s" % line)
834 return
835
836 # Skip anything that isn't properly assigned
837 if not status in ("assigned", "allocated"):
838 return
839
840 # Cast prefix into an integer
841 try:
842 prefix = int(prefix)
843 except:
844 log.warning("Invalid prefix: %s" % prefix)
7177031f 845 return
429a43d1
MT
846
847 # Fix prefix length for IPv4
848 if type == "ipv4":
849 prefix = 32 - int(math.log(prefix, 2))
850
851 # Try to parse the address
852 try:
853 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
854 except ValueError:
855 log.warning("Invalid IP address: %s" % address)
856 return
857
bd341642
PM
858 if not self._check_parsed_network(network):
859 return
860
b6b2b331
PM
861 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
862 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
87b3e102 863 UPDATE SET country = excluded.country",
b6b2b331 864 "%s" % network, country, [country], source_key,
6ffd06b5
MT
865 )
866
92403f39
PM
867 def _import_as_names_from_arin(self):
868 downloader = location.importer.Downloader()
869
870 # XXX: Download AS names file from ARIN (note that these names appear to be quite
871 # technical, not intended for human consumption, as description fields in
872 # organisation handles for other RIRs are - however, this is what we have got,
873 # and in some cases, it might be still better than nothing)
874 with downloader.request("https://ftp.arin.net/info/asn.txt", return_blocks=False) as f:
875 for line in f:
876 # Convert binary line to string...
877 line = str(line)
878
879 # ... valid lines start with a space, followed by the number of the Autonomous System ...
880 if not line.startswith(" "):
881 continue
882
883 # Split line and check if there is a valid ASN in it...
884 asn, name = line.split()[0:2]
885
886 try:
887 asn = int(asn)
888 except ValueError:
889 log.debug("Skipping ARIN AS names line not containing an integer for ASN")
890 continue
891
43fe570c
PM
892 # Filter invalid ASNs...
893 if not self._check_parsed_asn(asn):
92403f39
PM
894 continue
895
896 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
378b4c0a 897 if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|DNIC|LACNIC|RIPE|IANA)(?:\d?$|\-)", name):
92403f39
PM
898 continue
899
900 # Bail out in case the AS name contains anything we do not expect here...
901 if re.search(r"[^a-zA-Z0-9-_]", name):
902 log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
903 (asn, name))
904
905 # Things look good here, run INSERT statement and skip this one if we already have
906 # a (better?) name for this Autonomous System...
907 self.db.execute("""
908 INSERT INTO autnums(
909 number,
910 name,
911 source
912 ) VALUES (%s, %s, %s)
913 ON CONFLICT (number) DO NOTHING""",
914 asn,
915 name,
916 "ARIN",
917 )
918
83d61c46
MT
919 def handle_update_announcements(self, ns):
920 server = ns.server[0]
921
22d8d199
MT
922 with self.db.transaction():
923 if server.startswith("/"):
924 self._handle_update_announcements_from_bird(server)
925 else:
926 self._handle_update_announcements_from_telnet(server)
927
928 # Purge anything we never want here
929 self.db.execute("""
930 -- Delete default routes
931 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
932
933 -- Delete anything that is not global unicast address space
934 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
935
936 -- DELETE "current network" address space
937 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
938
939 -- DELETE local loopback address space
940 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
941
942 -- DELETE RFC 1918 address space
943 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
944 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
945 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
946
947 -- DELETE test, benchmark and documentation address space
948 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
949 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
950 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
951 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
952 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
953
954 -- DELETE CGNAT address space (RFC 6598)
955 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
956
957 -- DELETE link local address space
958 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
959
b4d5b2a6 960 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
22d8d199 961 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
b4d5b2a6 962 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
22d8d199
MT
963
964 -- DELETE multicast and reserved address space
965 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
966 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
967
968 -- Delete networks that are too small to be in the global routing table
969 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
970 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
971
972 -- Delete any non-public or reserved ASNs
973 DELETE FROM announcements WHERE NOT (
974 (autnum >= 1 AND autnum <= 23455)
975 OR
976 (autnum >= 23457 AND autnum <= 64495)
977 OR
978 (autnum >= 131072 AND autnum <= 4199999999)
979 );
980
981 -- Delete everything that we have not seen for 14 days
982 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
983 """)
984
985 def _handle_update_announcements_from_bird(self, server):
986 # Pre-compile the regular expression for faster searching
35e5f041 987 route = re.compile(b"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
22d8d199
MT
988
989 log.info("Requesting routing table from Bird (%s)" % server)
990
35e5f041
MT
991 aggregated_networks = []
992
22d8d199
MT
993 # Send command to list all routes
994 for line in self._bird_cmd(server, "show route"):
995 m = route.match(line)
996 if not m:
b767db8d
MT
997 # Skip empty lines
998 if not line:
999 pass
1000
45fa97cd 1001 # Ignore any header lines with the name of the routing table
b767db8d
MT
1002 elif line.startswith(b"Table"):
1003 pass
45fa97cd
MT
1004
1005 # Log anything else
b767db8d
MT
1006 else:
1007 log.debug("Could not parse line: %s" % line.decode())
1008
22d8d199
MT
1009 continue
1010
1011 # Fetch the extracted network and ASN
1012 network, autnum = m.groups()
1013
35e5f041
MT
1014 # Decode into strings
1015 if network:
1016 network = network.decode()
1017 if autnum:
1018 autnum = autnum.decode()
1019
1020 # Collect all aggregated networks
1021 if not autnum:
1022 log.debug("%s is an aggregated network" % network)
1023 aggregated_networks.append(network)
1024 continue
1025
22d8d199
MT
1026 # Insert it into the database
1027 self.db.execute("INSERT INTO announcements(network, autnum) \
1028 VALUES(%s, %s) ON CONFLICT (network) DO \
1029 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
35e5f041 1030 network, autnum,
22d8d199
MT
1031 )
1032
35e5f041
MT
1033 # Process any aggregated networks
1034 for network in aggregated_networks:
1035 log.debug("Processing aggregated network %s" % network)
1036
1037 # Run "show route all" for each network
1038 for line in self._bird_cmd(server, "show route %s all" % network):
1039 # Try finding the path
1040 m = re.match(b"\s+BGP\.as_path:.* (\d+) {\d+}$", line)
1041 if m:
1042 # Select the last AS number in the path
1043 autnum = m.group(1).decode()
1044
1045 # Insert it into the database
1046 self.db.execute("INSERT INTO announcements(network, autnum) \
1047 VALUES(%s, %s) ON CONFLICT (network) DO \
1048 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1049 network, autnum,
1050 )
1051
1052 # We don't need to process any more
1053 break
1054
22d8d199 1055 def _handle_update_announcements_from_telnet(self, server):
83d61c46 1056 # Pre-compile regular expression for routes
83d61c46
MT
1057 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
1058
1059 with telnetlib.Telnet(server) as t:
1060 # Enable debug mode
1061 #if ns.debug:
1062 # t.set_debuglevel(10)
1063
1064 # Wait for console greeting
fcd5b8b2
MT
1065 greeting = t.read_until(b"> ", timeout=30)
1066 if not greeting:
1067 log.error("Could not get a console prompt")
1068 return 1
83d61c46
MT
1069
1070 # Disable pagination
1071 t.write(b"terminal length 0\n")
1072
1073 # Wait for the prompt to return
1074 t.read_until(b"> ")
1075
1076 # Fetch the routing tables
22d8d199
MT
1077 for protocol in ("ipv6", "ipv4"):
1078 log.info("Requesting %s routing table" % protocol)
83d61c46 1079
22d8d199
MT
1080 # Request the full unicast routing table
1081 t.write(b"show bgp %s unicast\n" % protocol.encode())
83d61c46 1082
22d8d199
MT
1083 # Read entire header which ends with "Path"
1084 t.read_until(b"Path\r\n")
83d61c46 1085
22d8d199
MT
1086 while True:
1087 # Try reading a full entry
1088 # Those might be broken across multiple lines but ends with i
1089 line = t.read_until(b"i\r\n", timeout=5)
1090 if not line:
1091 break
83d61c46 1092
22d8d199
MT
1093 # Show line for debugging
1094 #log.debug(repr(line))
d773c1bc 1095
22d8d199
MT
1096 # Try finding a route in here
1097 m = route.match(line)
1098 if m:
1099 network, autnum = m.groups()
83d61c46 1100
22d8d199
MT
1101 # Convert network to string
1102 network = network.decode()
83d61c46 1103
22d8d199
MT
1104 # Append /24 for IPv4 addresses
1105 if not "/" in network and not ":" in network:
1106 network = "%s/24" % network
83d61c46 1107
22d8d199
MT
1108 # Convert AS number to integer
1109 autnum = int(autnum)
83d61c46 1110
22d8d199 1111 log.info("Found announcement for %s by %s" % (network, autnum))
83d61c46 1112
22d8d199
MT
1113 self.db.execute("INSERT INTO announcements(network, autnum) \
1114 VALUES(%s, %s) ON CONFLICT (network) DO \
1115 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1116 network, autnum,
1117 )
83d61c46 1118
22d8d199 1119 log.info("Finished reading the %s routing table" % protocol)
1d4e4e8f 1120
22d8d199
MT
1121 def _bird_cmd(self, socket_path, command):
1122 # Connect to the socket
1123 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1124 s.connect(socket_path)
cedee656 1125
22d8d199
MT
1126 # Allocate some buffer
1127 buffer = b""
83d61c46 1128
bbcefb38
MT
1129 log.debug("Sending Bird command: %s" % command)
1130
22d8d199
MT
1131 # Send the command
1132 s.send(b"%s\n" % command.encode())
209c04b6 1133
22d8d199
MT
1134 while True:
1135 # Fill up the buffer
1136 buffer += s.recv(4096)
209c04b6 1137
22d8d199
MT
1138 while True:
1139 # Search for the next newline
1140 pos = buffer.find(b"\n")
209c04b6 1141
22d8d199
MT
1142 # If we cannot find one, we go back and read more data
1143 if pos <= 0:
1144 break
209c04b6 1145
22d8d199
MT
1146 # Cut after the newline character
1147 pos += 1
b89cee80 1148
22d8d199
MT
1149 # Split the line we want and keep the rest in buffer
1150 line, buffer = buffer[:pos], buffer[pos:]
83d61c46 1151
bbcefb38
MT
1152 # Try parsing any status lines
1153 if len(line) > 4 and line[:4].isdigit() and line[4] in (32, 45):
1154 code, delim, line = int(line[:4]), line[4], line[5:]
1155
1156 log.debug("Received response code %s from bird" % code)
1157
1158 # End of output
1159 if code == 0:
1160 return
1161
1162 # Ignore hello line
1163 elif code == 1:
1164 continue
83d61c46 1165
22d8d199
MT
1166 # Otherwise return the line
1167 yield line
83d61c46 1168
d7fc3057
MT
1169 def handle_update_overrides(self, ns):
1170 with self.db.transaction():
1171 # Drop all data that we have
1172 self.db.execute("""
1173 TRUNCATE TABLE autnum_overrides;
1174 TRUNCATE TABLE network_overrides;
1175 """)
1176
dcef2ba4
PM
1177 # Update overrides for various cloud providers big enough to publish their own IP
1178 # network allocation lists in a machine-readable format...
1179 self._update_overrides_for_aws()
1180
69b3d894
PM
1181 # Update overrides for Spamhaus DROP feeds...
1182 self._update_overrides_for_spamhaus_drop()
1183
d7fc3057
MT
1184 for file in ns.files:
1185 log.info("Reading %s..." % file)
1186
1187 with open(file, "rb") as f:
1188 for type, block in location.importer.read_blocks(f):
1189 if type == "net":
1190 network = block.get("net")
1191 # Try to parse and normalise the network
1192 try:
1193 network = ipaddress.ip_network(network, strict=False)
1194 except ValueError as e:
1195 log.warning("Invalid IP network: %s: %s" % (network, e))
1196 continue
1197
94dfab8c
MT
1198 # Prevent that we overwrite all networks
1199 if network.prefixlen == 0:
1200 log.warning("Skipping %s: You cannot overwrite default" % network)
1201 continue
1202
d7fc3057
MT
1203 self.db.execute("""
1204 INSERT INTO network_overrides(
1205 network,
1206 country,
39ee3120 1207 source,
d7fc3057
MT
1208 is_anonymous_proxy,
1209 is_satellite_provider,
e17e804e
PM
1210 is_anycast,
1211 is_drop
39ee3120 1212 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
d7fc3057
MT
1213 ON CONFLICT (network) DO NOTHING""",
1214 "%s" % network,
1215 block.get("country"),
39ee3120 1216 "manual",
28d29b7c
MT
1217 self._parse_bool(block, "is-anonymous-proxy"),
1218 self._parse_bool(block, "is-satellite-provider"),
1219 self._parse_bool(block, "is-anycast"),
e17e804e 1220 self._parse_bool(block, "drop"),
d7fc3057
MT
1221 )
1222
f476cdfd
MT
1223 elif type == "aut-num":
1224 autnum = block.get("aut-num")
d7fc3057
MT
1225
1226 # Check if AS number begins with "AS"
1227 if not autnum.startswith("AS"):
1228 log.warning("Invalid AS number: %s" % autnum)
1229 continue
1230
1231 # Strip "AS"
1232 autnum = autnum[2:]
1233
1234 self.db.execute("""
1235 INSERT INTO autnum_overrides(
1236 number,
1237 name,
bd1aa6a1 1238 country,
39ee3120 1239 source,
d7fc3057
MT
1240 is_anonymous_proxy,
1241 is_satellite_provider,
e17e804e
PM
1242 is_anycast,
1243 is_drop
39ee3120 1244 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
d7fc3057 1245 ON CONFLICT DO NOTHING""",
bd1aa6a1
MT
1246 autnum,
1247 block.get("name"),
1248 block.get("country"),
39ee3120 1249 "manual",
28d29b7c
MT
1250 self._parse_bool(block, "is-anonymous-proxy"),
1251 self._parse_bool(block, "is-satellite-provider"),
1252 self._parse_bool(block, "is-anycast"),
e17e804e 1253 self._parse_bool(block, "drop"),
d7fc3057
MT
1254 )
1255
1256 else:
03d24a9b 1257 log.warning("Unsupported type: %s" % type)
d7fc3057 1258
dcef2ba4
PM
1259 def _update_overrides_for_aws(self):
1260 # Download Amazon AWS IP allocation file to create overrides...
1261 downloader = location.importer.Downloader()
1262
1263 try:
1264 with downloader.request("https://ip-ranges.amazonaws.com/ip-ranges.json", return_blocks=False) as f:
1265 aws_ip_dump = json.load(f.body)
1266 except Exception as e:
1267 log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
1268 return
1269
1270 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1271 # there seems to be no machine-readable version available of this other than
1272 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1273 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1274 # was helpful here as well.
1275 aws_region_country_map = {
1276 "af-south-1": "ZA",
1277 "ap-east-1": "HK",
1278 "ap-south-1": "IN",
1279 "ap-south-2": "IN",
1280 "ap-northeast-3": "JP",
1281 "ap-northeast-2": "KR",
1282 "ap-southeast-1": "SG",
1283 "ap-southeast-2": "AU",
1284 "ap-southeast-3": "MY",
1285 "ap-southeast-4": "AU",
1286 "ap-northeast-1": "JP",
1287 "ca-central-1": "CA",
1288 "eu-central-1": "DE",
1289 "eu-central-2": "CH",
1290 "eu-west-1": "IE",
1291 "eu-west-2": "GB",
1292 "eu-south-1": "IT",
1293 "eu-south-2": "ES",
1294 "eu-west-3": "FR",
1295 "eu-north-1": "SE",
ad017947 1296 "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
dcef2ba4
PM
1297 "me-central-1": "AE",
1298 "me-south-1": "BH",
1299 "sa-east-1": "BR"
1300 }
1301
1302 # Fetch all valid country codes to check parsed networks aganist...
1303 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
1304 validcountries = []
1305
1306 for row in rows:
1307 validcountries.append(row.country_code)
1308
1309 with self.db.transaction():
1310 for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
1311 try:
1312 network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
1313 except ValueError:
1314 log.warning("Unable to parse line: %s" % snetwork)
1315 continue
1316
1317 # Sanitize parsed networks...
1318 if not self._check_parsed_network(network):
1319 continue
1320
1321 # Determine region of this network...
1322 region = snetwork["region"]
1323 cc = None
1324 is_anycast = False
1325
1326 # Any region name starting with "us-" will get "US" country code assigned straight away...
1327 if region.startswith("us-"):
1328 cc = "US"
1329 elif region.startswith("cn-"):
1330 # ... same goes for China ...
1331 cc = "CN"
1332 elif region == "GLOBAL":
1333 # ... funny region name for anycast-like networks ...
1334 is_anycast = True
1335 elif region in aws_region_country_map:
1336 # ... assign looked up country code otherwise ...
1337 cc = aws_region_country_map[region]
1338 else:
1339 # ... and bail out if we are missing something here
1340 log.warning("Unable to determine country code for line: %s" % snetwork)
1341 continue
1342
1343 # Skip networks with unknown country codes
1344 if not is_anycast and validcountries and cc not in validcountries:
1345 log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1346 (cc, network))
1347 return
1348
1349 # Conduct SQL statement...
1350 self.db.execute("""
1351 INSERT INTO network_overrides(
1352 network,
1353 country,
1354 source,
1355 is_anonymous_proxy,
1356 is_satellite_provider,
1357 is_anycast
1358 ) VALUES (%s, %s, %s, %s, %s, %s)
1359 ON CONFLICT (network) DO NOTHING""",
1360 "%s" % network,
1361 cc,
1362 "Amazon AWS IP feed",
1363 None,
1364 None,
1365 is_anycast,
1366 )
1367
1368
69b3d894
PM
1369 def _update_overrides_for_spamhaus_drop(self):
1370 downloader = location.importer.Downloader()
1371
1372 ip_urls = [
1373 "https://www.spamhaus.org/drop/drop.txt",
1374 "https://www.spamhaus.org/drop/edrop.txt",
1375 "https://www.spamhaus.org/drop/dropv6.txt"
1376 ]
1377
1378 asn_urls = [
1379 "https://www.spamhaus.org/drop/asndrop.txt"
1380 ]
1381
1382 for url in ip_urls:
1383 try:
1384 with downloader.request(url, return_blocks=False) as f:
1385 fcontent = f.body.readlines()
1386 except Exception as e:
1387 log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e))
1388 return
1389
1390 # Iterate through every line, filter comments and add remaining networks to
1391 # the override table in case they are valid...
1392 with self.db.transaction():
1393 for sline in fcontent:
1394
1395 # The response is assumed to be encoded in UTF-8...
1396 sline = sline.decode("utf-8")
1397
1398 # Comments start with a semicolon...
1399 if sline.startswith(";"):
1400 continue
1401
1402 # Extract network and ignore anything afterwards...
1403 try:
1404 network = ipaddress.ip_network(sline.split()[0], strict=False)
1405 except ValueError:
1406 log.error("Unable to parse line: %s" % sline)
1407 continue
1408
1409 # Sanitize parsed networks...
1410 if not self._check_parsed_network(network):
1411 log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
1412 (url, network))
1413 continue
1414
1415 # Conduct SQL statement...
1416 self.db.execute("""
1417 INSERT INTO network_overrides(
1418 network,
1419 source,
1420 is_drop
1421 ) VALUES (%s, %s, %s)
148f68bf 1422 ON CONFLICT (network) DO UPDATE SET is_drop = True""",
69b3d894
PM
1423 "%s" % network,
1424 "Spamhaus DROP lists",
1425 True
1426 )
1427
1428 for url in asn_urls:
1429 try:
1430 with downloader.request(url, return_blocks=False) as f:
1431 fcontent = f.body.readlines()
1432 except Exception as e:
1433 log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e))
1434 return
1435
1436 # Iterate through every line, filter comments and add remaining ASNs to
1437 # the override table in case they are valid...
1438 with self.db.transaction():
1439 for sline in fcontent:
1440
1441 # The response is assumed to be encoded in UTF-8...
1442 sline = sline.decode("utf-8")
1443
1444 # Comments start with a semicolon...
1445 if sline.startswith(";"):
1446 continue
1447
1448 # Throw away anything after the first space...
1449 sline = sline.split()[0]
1450
1451 # ... strip the "AS" prefix from it ...
1452 sline = sline.strip("AS")
1453
1454 # ... and convert it into an integer. Voila.
1455 asn = int(sline)
1456
1457 # Filter invalid ASNs...
1458 if not self._check_parsed_asn(asn):
1459 log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
1460 (url, asn))
1461 continue
1462
1463 # Conduct SQL statement...
1464 self.db.execute("""
1465 INSERT INTO autnum_overrides(
1466 number,
1467 source,
1468 is_drop
1469 ) VALUES (%s, %s, %s)
148f68bf 1470 ON CONFLICT (number) DO UPDATE SET is_drop = True""",
69b3d894
PM
1471 "%s" % asn,
1472 "Spamhaus ASN-DROP list",
1473 True
1474 )
1475
28d29b7c
MT
1476 @staticmethod
1477 def _parse_bool(block, key):
1478 val = block.get(key)
1479
1480 # There is no point to proceed when we got None
1481 if val is None:
1482 return
1483
1484 # Convert to lowercase
1485 val = val.lower()
1486
1487 # True
1488 if val in ("yes", "1"):
1489 return True
1490
1491 # False
1492 if val in ("no", "0"):
1493 return False
1494
1495 # Default to None
1496 return None
1497
8084b33a
MT
1498 def handle_import_countries(self, ns):
1499 with self.db.transaction():
1500 # Drop all data that we have
1501 self.db.execute("TRUNCATE TABLE countries")
1502
1503 for file in ns.file:
1504 for line in file:
1505 line = line.rstrip()
1506
1507 # Ignore any comments
1508 if line.startswith("#"):
1509 continue
1510
1511 try:
1512 country_code, continent_code, name = line.split(maxsplit=2)
1513 except:
1514 log.warning("Could not parse line: %s" % line)
1515 continue
1516
1517 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
1518 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
1519
6ffd06b5
MT
1520
1521def split_line(line):
1522 key, colon, val = line.partition(":")
1523
1524 # Strip any excess space
1525 key = key.strip()
1526 val = val.strip()
78ff0cf2 1527
6ffd06b5 1528 return key, val
78ff0cf2
MT
1529
1530def main():
1531 # Run the command line interface
1532 c = CLI()
1533 c.run()
1534
1535main()