]> git.ipfire.org Git - people/ms/libloc.git/blame - src/scripts/location-importer.in
importer: Log any errors to the database
[people/ms/libloc.git] / src / scripts / location-importer.in
CommitLineData
78ff0cf2
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
2ad36187 6# Copyright (C) 2020-2022 IPFire Development Team <info@ipfire.org> #
78ff0cf2
MT
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import argparse
0fcdd689 21import concurrent.futures
6ffd06b5 22import ipaddress
dcef2ba4 23import json
78ff0cf2 24import logging
6ffd06b5
MT
25import math
26import re
22d8d199 27import socket
78ff0cf2 28import sys
3ce28dea 29import urllib.error
78ff0cf2
MT
30
31# Load our location module
32import location
29c6fa22 33import location.database
3192b66c 34import location.importer
78ff0cf2
MT
35from location.i18n import _
36
37# Initialise logging
38log = logging.getLogger("location.importer")
39log.propagate = 1
40
43fe570c
PM
41# Define constants
42VALID_ASN_RANGES = (
43 (1, 23455),
44 (23457, 64495),
45 (131072, 4199999999),
46)
47
48
78ff0cf2
MT
49class CLI(object):
50 def parse_cli(self):
51 parser = argparse.ArgumentParser(
52 description=_("Location Importer Command Line Interface"),
53 )
6ffd06b5 54 subparsers = parser.add_subparsers()
78ff0cf2
MT
55
56 # Global configuration flags
57 parser.add_argument("--debug", action="store_true",
58 help=_("Enable debug output"))
bc1f5f53
MT
59 parser.add_argument("--quiet", action="store_true",
60 help=_("Enable quiet mode"))
78ff0cf2
MT
61
62 # version
63 parser.add_argument("--version", action="version",
64 version="%(prog)s @VERSION@")
65
29c6fa22
MT
66 # Database
67 parser.add_argument("--database-host", required=True,
68 help=_("Database Hostname"), metavar=_("HOST"))
69 parser.add_argument("--database-name", required=True,
70 help=_("Database Name"), metavar=_("NAME"))
71 parser.add_argument("--database-username", required=True,
72 help=_("Database Username"), metavar=_("USERNAME"))
73 parser.add_argument("--database-password", required=True,
74 help=_("Database Password"), metavar=_("PASSWORD"))
75
0983f3dd
MT
76 # Write Database
77 write = subparsers.add_parser("write", help=_("Write database to file"))
78 write.set_defaults(func=self.handle_write)
79 write.add_argument("file", nargs=1, help=_("Database File"))
80 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
1164d876 81 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
0983f3dd
MT
82 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
83 write.add_argument("--description", nargs="?", help=_("Sets a description"))
84 write.add_argument("--license", nargs="?", help=_("Sets the license"))
b904896a 85 write.add_argument("--version", type=int, help=_("Database Format Version"))
0983f3dd 86
6ffd06b5
MT
87 # Update WHOIS
88 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
89 update_whois.set_defaults(func=self.handle_update_whois)
90
83d61c46
MT
91 # Update announcements
92 update_announcements = subparsers.add_parser("update-announcements",
93 help=_("Update BGP Annoucements"))
94 update_announcements.set_defaults(func=self.handle_update_announcements)
95 update_announcements.add_argument("server", nargs=1,
96 help=_("Route Server to connect to"), metavar=_("SERVER"))
97
0fcdd689
MT
98 # Update geofeeds
99 update_geofeeds = subparsers.add_parser("update-geofeeds",
100 help=_("Update Geofeeds"))
101 update_geofeeds.set_defaults(func=self.handle_update_geofeeds)
102
d7fc3057
MT
103 # Update overrides
104 update_overrides = subparsers.add_parser("update-overrides",
105 help=_("Update overrides"),
106 )
107 update_overrides.add_argument(
108 "files", nargs="+", help=_("Files to import"),
109 )
110 update_overrides.set_defaults(func=self.handle_update_overrides)
111
8084b33a
MT
112 # Import countries
113 import_countries = subparsers.add_parser("import-countries",
114 help=_("Import countries"),
115 )
116 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
117 help=_("File to import"))
118 import_countries.set_defaults(func=self.handle_import_countries)
119
78ff0cf2
MT
120 args = parser.parse_args()
121
bc1f5f53 122 # Configure logging
78ff0cf2 123 if args.debug:
f9de5e61 124 location.logger.set_level(logging.DEBUG)
bc1f5f53
MT
125 elif args.quiet:
126 location.logger.set_level(logging.WARNING)
78ff0cf2 127
6ffd06b5
MT
128 # Print usage if no action was given
129 if not "func" in args:
130 parser.print_usage()
131 sys.exit(2)
132
78ff0cf2
MT
133 return args
134
135 def run(self):
136 # Parse command line arguments
137 args = self.parse_cli()
138
29c6fa22 139 # Initialise database
6ffd06b5 140 self.db = self._setup_database(args)
29c6fa22 141
78ff0cf2 142 # Call function
6ffd06b5 143 ret = args.func(args)
78ff0cf2
MT
144
145 # Return with exit code
146 if ret:
147 sys.exit(ret)
148
149 # Otherwise just exit
150 sys.exit(0)
151
29c6fa22
MT
152 def _setup_database(self, ns):
153 """
154 Initialise the database
155 """
156 # Connect to database
157 db = location.database.Connection(
158 host=ns.database_host, database=ns.database_name,
159 user=ns.database_username, password=ns.database_password,
160 )
161
162 with db.transaction():
163 db.execute("""
83d61c46
MT
164 -- announcements
165 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
166 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
167 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
168 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
169 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
a1707d89 170 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
83d61c46 171
6ffd06b5 172 -- autnums
0983f3dd 173 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
26f06e70 174 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
6ffd06b5
MT
175 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
176
8084b33a
MT
177 -- countries
178 CREATE TABLE IF NOT EXISTS countries(
179 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
180 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
181
429a43d1 182 -- networks
83d61c46 183 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
b6b2b331 184 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
26f06e70 185 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
429a43d1 186 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
002deb6b 187 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
83d61c46 188 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
d7fc3057 189
183b2f74 190 -- geofeeds
0fcdd689
MT
191 CREATE TABLE IF NOT EXISTS geofeeds(
192 id serial primary key,
193 url text,
194 status integer default null,
195 updated_at timestamp without time zone default null
196 );
ce308393 197 ALTER TABLE geofeeds ADD COLUMN IF NOT EXISTS error text;
0fcdd689
MT
198 CREATE UNIQUE INDEX IF NOT EXISTS geofeeds_unique
199 ON geofeeds(url);
200 CREATE TABLE IF NOT EXISTS geofeed_networks(
201 geofeed_id integer references geofeeds(id) on delete cascade,
202 network inet,
203 country text,
204 region text,
205 city text
206 );
207 CREATE INDEX IF NOT EXISTS geofeed_networks_geofeed_id
208 ON geofeed_networks(geofeed_id);
209 CREATE INDEX IF NOT EXISTS geofeed_networks_search
47fc42fb 210 ON geofeed_networks USING GIST(network inet_ops);
183b2f74
MT
211 CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
212 CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique
213 ON network_geofeeds(network);
7f0a6add
MT
214 CREATE INDEX IF NOT EXISTS network_geofeeds_search
215 ON network_geofeeds USING GIST(network inet_ops);
216 CREATE INDEX IF NOT EXISTS network_geofeeds_url
217 ON network_geofeeds(url);
183b2f74 218
d7fc3057
MT
219 -- overrides
220 CREATE TABLE IF NOT EXISTS autnum_overrides(
221 number bigint NOT NULL,
222 name text,
bd1aa6a1 223 country text,
b8e25b71
MT
224 is_anonymous_proxy boolean,
225 is_satellite_provider boolean,
226 is_anycast boolean
d7fc3057
MT
227 );
228 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
229 ON autnum_overrides(number);
39ee3120 230 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
e17e804e 231 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
d7fc3057
MT
232
233 CREATE TABLE IF NOT EXISTS network_overrides(
234 network inet NOT NULL,
235 country text,
b8e25b71
MT
236 is_anonymous_proxy boolean,
237 is_satellite_provider boolean,
238 is_anycast boolean
d7fc3057
MT
239 );
240 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
241 ON network_overrides(network);
991baf53
MT
242 CREATE INDEX IF NOT EXISTS network_overrides_search
243 ON network_overrides USING GIST(network inet_ops);
39ee3120 244 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
e17e804e 245 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
29c6fa22
MT
246 """)
247
248 return db
249
0983f3dd
MT
250 def handle_write(self, ns):
251 """
252 Compiles a database in libloc format out of what is in the database
253 """
0983f3dd 254 # Allocate a writer
1164d876 255 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
0983f3dd
MT
256
257 # Set all metadata
258 if ns.vendor:
259 writer.vendor = ns.vendor
260
261 if ns.description:
262 writer.description = ns.description
263
264 if ns.license:
265 writer.license = ns.license
266
267 # Add all Autonomous Systems
268 log.info("Writing Autonomous Systems...")
269
270 # Select all ASes with a name
6e97c44b
MT
271 rows = self.db.query("""
272 SELECT
273 autnums.number AS number,
274 COALESCE(
275 (SELECT overrides.name FROM autnum_overrides overrides
276 WHERE overrides.number = autnums.number),
277 autnums.name
278 ) AS name
279 FROM autnums
280 WHERE name <> %s ORDER BY number
281 """, "")
0983f3dd
MT
282
283 for row in rows:
284 a = writer.add_as(row.number)
285 a.name = row.name
286
287 # Add all networks
288 log.info("Writing networks...")
289
290 # Select all known networks
291 rows = self.db.query("""
295a7774
MT
292 WITH known_networks AS (
293 SELECT network FROM announcements
294 UNION
295 SELECT network FROM networks
296 UNION
297 SELECT network FROM network_overrides
e87dbd44
MT
298 UNION
299 SELECT network FROM geofeed_networks
295a7774
MT
300 ),
301
302 ordered_networks AS (
303 SELECT
304 known_networks.network AS network,
305 announcements.autnum AS autnum,
306 networks.country AS country,
307
308 -- Must be part of returned values for ORDER BY clause
309 masklen(announcements.network) AS sort_a,
310 masklen(networks.network) AS sort_b
311 FROM
312 known_networks
313 LEFT JOIN
314 announcements ON known_networks.network <<= announcements.network
315 LEFT JOIN
316 networks ON known_networks.network <<= networks.network
317 ORDER BY
318 known_networks.network,
319 sort_a DESC,
320 sort_b DESC
321 )
322
5372d9c7
MT
323 -- Return a list of those networks enriched with all
324 -- other information that we store in the database
0983f3dd 325 SELECT
bbea93a7
MT
326 DISTINCT ON (network)
327 network,
328 autnum,
bd1aa6a1
MT
329
330 -- Country
331 COALESCE(
332 (
333 SELECT country FROM network_overrides overrides
bbea93a7 334 WHERE networks.network <<= overrides.network
bd1aa6a1
MT
335 ORDER BY masklen(overrides.network) DESC
336 LIMIT 1
337 ),
338 (
339 SELECT country FROM autnum_overrides overrides
bbea93a7 340 WHERE networks.autnum = overrides.number
bd1aa6a1 341 ),
e87dbd44
MT
342 (
343 SELECT
344 geofeed_networks.country AS country
345 FROM
346 network_geofeeds
347
348 -- Join the data from the geofeeds
349 LEFT JOIN
350 geofeeds ON network_geofeeds.url = geofeeds.url
351 LEFT JOIN
352 geofeed_networks ON geofeeds.id = geofeed_networks.geofeed_id
353
354 -- Check whether we have a geofeed for this network
355 WHERE
356 networks.network <<= network_geofeeds.network
357 AND
358 networks.network <<= geofeed_networks.network
359
360 -- Filter for the best result
361 ORDER BY
362 masklen(geofeed_networks.network) DESC
363 LIMIT 1
364 ),
bd1aa6a1
MT
365 networks.country
366 ) AS country,
8e8555bb 367
0983f3dd 368 -- Flags
1422b5d4
MT
369 COALESCE(
370 (
371 SELECT is_anonymous_proxy FROM network_overrides overrides
bbea93a7 372 WHERE networks.network <<= overrides.network
1422b5d4
MT
373 ORDER BY masklen(overrides.network) DESC
374 LIMIT 1
375 ),
376 (
377 SELECT is_anonymous_proxy FROM autnum_overrides overrides
bbea93a7 378 WHERE networks.autnum = overrides.number
b8e25b71
MT
379 ),
380 FALSE
1422b5d4
MT
381 ) AS is_anonymous_proxy,
382 COALESCE(
383 (
384 SELECT is_satellite_provider FROM network_overrides overrides
bbea93a7 385 WHERE networks.network <<= overrides.network
1422b5d4
MT
386 ORDER BY masklen(overrides.network) DESC
387 LIMIT 1
388 ),
389 (
390 SELECT is_satellite_provider FROM autnum_overrides overrides
bbea93a7 391 WHERE networks.autnum = overrides.number
b8e25b71
MT
392 ),
393 FALSE
1422b5d4
MT
394 ) AS is_satellite_provider,
395 COALESCE(
396 (
397 SELECT is_anycast FROM network_overrides overrides
bbea93a7 398 WHERE networks.network <<= overrides.network
1422b5d4
MT
399 ORDER BY masklen(overrides.network) DESC
400 LIMIT 1
401 ),
402 (
403 SELECT is_anycast FROM autnum_overrides overrides
bbea93a7 404 WHERE networks.autnum = overrides.number
b8e25b71
MT
405 ),
406 FALSE
e17e804e
PM
407 ) AS is_anycast,
408 COALESCE(
409 (
410 SELECT is_drop FROM network_overrides overrides
411 WHERE networks.network <<= overrides.network
412 ORDER BY masklen(overrides.network) DESC
413 LIMIT 1
414 ),
415 (
416 SELECT is_drop FROM autnum_overrides overrides
417 WHERE networks.autnum = overrides.number
418 ),
419 FALSE
420 ) AS is_drop
295a7774
MT
421 FROM
422 ordered_networks networks
0983f3dd
MT
423 """)
424
425 for row in rows:
426 network = writer.add_network(row.network)
427
5372d9c7
MT
428 # Save country
429 if row.country:
430 network.country_code = row.country
431
432 # Save ASN
433 if row.autnum:
434 network.asn = row.autnum
0983f3dd
MT
435
436 # Set flags
437 if row.is_anonymous_proxy:
438 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
439
440 if row.is_satellite_provider:
441 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
442
443 if row.is_anycast:
444 network.set_flag(location.NETWORK_FLAG_ANYCAST)
445
e17e804e
PM
446 if row.is_drop:
447 network.set_flag(location.NETWORK_FLAG_DROP)
448
8084b33a
MT
449 # Add all countries
450 log.info("Writing countries...")
451 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
452
453 for row in rows:
454 c = writer.add_country(row.country_code)
455 c.continent_code = row.continent_code
456 c.name = row.name
457
0983f3dd
MT
458 # Write everything to file
459 log.info("Writing database to file...")
460 for file in ns.file:
461 writer.write(file)
462
6ffd06b5
MT
463 def handle_update_whois(self, ns):
464 downloader = location.importer.Downloader()
465
3ce28dea
MT
466 # Did we run successfully?
467 error = False
84b175e2 468
3ce28dea
MT
469 # Fetch all valid country codes to check parsed networks aganist
470 validcountries = self.countries
6ffd06b5 471
3ce28dea 472 # Iterate over all potential sources
f3edaa9c 473 for source in sorted(location.importer.SOURCES):
3ce28dea
MT
474 with self.db.transaction():
475 # Create some temporary tables to store parsed data
476 self.db.execute("""
477 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
478 organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
479 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
480
481 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
482 name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
483 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
484
485 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL,
486 original_countries text[] NOT NULL, source text NOT NULL)
487 ON COMMIT DROP;
488 CREATE INDEX _rirdata_search ON _rirdata
489 USING BTREE(family(network), masklen(network));
490 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
491 """)
002deb6b 492
3ce28dea
MT
493 # Remove all previously imported content
494 self.db.execute("DELETE FROM networks WHERE source = %s", source)
002deb6b 495
3ce28dea
MT
496 try:
497 # Fetch WHOIS sources
498 for url in location.importer.WHOIS_SOURCES.get(source, []):
499 for block in downloader.request_blocks(url):
500 self._parse_block(block, source, validcountries)
501
502 # Fetch extended sources
503 for url in location.importer.EXTENDED_SOURCES.get(source, []):
504 for line in downloader.request_lines(url):
505 self._parse_line(line, source, validcountries)
506 except urllib.error.URLError as e:
e76b8204 507 log.error("Could not retrieve data from %s: %s" % (source, e))
3ce28dea
MT
508 error = True
509
510 # Continue with the next source
511 continue
002deb6b 512
3ce28dea
MT
513 # Process all parsed networks from every RIR we happen to have access to,
514 # insert the largest network chunks into the networks table immediately...
515 families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata \
516 ORDER BY family(network)")
517
518 for family in (row.family for row in families):
5bfa1bb4
MT
519 # Fetch the smallest mask length in our data set
520 smallest = self.db.get("""
521 SELECT
522 MIN(
523 masklen(network)
524 ) AS prefix
525 FROM
526 _rirdata
527 WHERE
528 family(network) = %s""",
529 family,
530 )
531
532 # Copy all networks
533 self.db.execute("""
534 INSERT INTO
535 networks
536 (
537 network,
538 country,
539 original_countries,
540 source
541 )
542 SELECT
543 network,
544 country,
545 original_countries,
546 source
547 FROM
548 _rirdata
549 WHERE
550 masklen(network) = %s
551 AND
552 family(network) = %s
553 ON CONFLICT DO
554 NOTHING""",
555 smallest.prefix,
556 family,
557 )
3ce28dea
MT
558
559 # ... determine any other prefixes for this network family, ...
5bfa1bb4
MT
560 prefixes = self.db.query("""
561 SELECT
562 DISTINCT masklen(network) AS prefix
563 FROM
564 _rirdata
565 WHERE
566 family(network) = %s
567 ORDER BY
568 masklen(network) ASC
569 OFFSET 1""",
570 family,
571 )
3ce28dea
MT
572
573 # ... and insert networks with this prefix in case they provide additional
574 # information (i. e. subnet of a larger chunk with a different country)
575 for prefix in (row.prefix for row in prefixes):
576 self.db.execute("""
577 WITH candidates AS (
578 SELECT
579 _rirdata.network,
580 _rirdata.country,
581 _rirdata.original_countries,
582 _rirdata.source
583 FROM
584 _rirdata
585 WHERE
586 family(_rirdata.network) = %s
587 AND
588 masklen(_rirdata.network) = %s
589 ),
590 filtered AS (
591 SELECT
592 DISTINCT ON (c.network)
593 c.network,
594 c.country,
595 c.original_countries,
596 c.source,
597 masklen(networks.network),
598 networks.country AS parent_country
599 FROM
600 candidates c
601 LEFT JOIN
602 networks
603 ON
604 c.network << networks.network
605 ORDER BY
606 c.network,
607 masklen(networks.network) DESC NULLS LAST
608 )
609 INSERT INTO
610 networks(network, country, original_countries, source)
002deb6b 611 SELECT
3ce28dea
MT
612 network,
613 country,
614 original_countries,
615 source
002deb6b 616 FROM
3ce28dea 617 filtered
002deb6b 618 WHERE
3ce28dea
MT
619 parent_country IS NULL
620 OR
621 country <> parent_country
622 ON CONFLICT DO NOTHING""",
623 family, prefix,
002deb6b 624 )
0365119d 625
3ce28dea
MT
626 self.db.execute("""
627 INSERT INTO autnums(number, name, source)
628 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
629 JOIN _organizations ON _autnums.organization = _organizations.handle
630 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
631 """)
429a43d1 632
3ce28dea
MT
633 # Download and import (technical) AS names from ARIN
634 with self.db.transaction():
671e149e 635 self._import_as_names_from_arin()
92403f39 636
3ce28dea
MT
637 # Return a non-zero exit code for errors
638 return 1 if error else 0
639
bd341642
PM
640 def _check_parsed_network(self, network):
641 """
642 Assistive function to detect and subsequently sort out parsed
643 networks from RIR data (both Whois and so-called "extended sources"),
644 which are or have...
645
646 (a) not globally routable (RFC 1918 space, et al.)
647 (b) covering a too large chunk of the IP address space (prefix length
648 is < 7 for IPv4 networks, and < 10 for IPv6)
649 (c) "0.0.0.0" or "::" as a network address
650 (d) are too small for being publicly announced (we have decided not to
651 process them at the moment, as they significantly enlarge our
652 database without providing very helpful additional information)
653
654 This unfortunately is necessary due to brain-dead clutter across
655 various RIR databases, causing mismatches and eventually disruptions.
656
657 We will return False in case a network is not suitable for adding
658 it to our database, and True otherwise.
659 """
660
661 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
662 return False
663
664 if not network.is_global:
2ba6ed07 665 log.debug("Skipping non-globally routable network: %s" % network)
bd341642
PM
666 return False
667
668 if network.version == 4:
669 if network.prefixlen < 7:
2ba6ed07 670 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
671 return False
672
673 if network.prefixlen > 24:
ebb087cf 674 log.debug("Skipping network too small to be publicly announced: %s" % network)
bd341642
PM
675 return False
676
677 if str(network.network_address) == "0.0.0.0":
2ba6ed07 678 log.debug("Skipping network based on 0.0.0.0: %s" % network)
bd341642
PM
679 return False
680
681 elif network.version == 6:
682 if network.prefixlen < 10:
2ba6ed07 683 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
684 return False
685
686 if network.prefixlen > 48:
ebb087cf 687 log.debug("Skipping network too small to be publicly announced: %s" % network)
bd341642
PM
688 return False
689
690 if str(network.network_address) == "::":
2ba6ed07 691 log.debug("Skipping network based on '::': %s" % network)
bd341642
PM
692 return False
693
694 else:
695 # This should not happen...
84187ab5 696 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
bd341642
PM
697 return False
698
699 # In case we have made it here, the network is considered to
700 # be suitable for libloc consumption...
701 return True
702
43fe570c
PM
703 def _check_parsed_asn(self, asn):
704 """
705 Assistive function to filter Autonomous System Numbers not being suitable
706 for adding to our database. Returns False in such cases, and True otherwise.
707 """
708
709 for start, end in VALID_ASN_RANGES:
710 if start <= asn and end >= asn:
711 return True
712
713 log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
714 return False
715
28b08385 716 def _parse_block(self, block, source_key, validcountries = None):
6ffd06b5
MT
717 # Get first line to find out what type of block this is
718 line = block[0]
719
6ffd06b5 720 # aut-num
429a43d1 721 if line.startswith("aut-num:"):
28b08385 722 return self._parse_autnum_block(block, source_key)
6ffd06b5 723
aadac4c5
PM
724 # inetnum
725 if line.startswith("inet6num:") or line.startswith("inetnum:"):
28b08385 726 return self._parse_inetnum_block(block, source_key, validcountries)
aadac4c5 727
6ffd06b5
MT
728 # organisation
729 elif line.startswith("organisation:"):
28b08385 730 return self._parse_org_block(block, source_key)
6ffd06b5 731
28b08385 732 def _parse_autnum_block(self, block, source_key):
6ffd06b5
MT
733 autnum = {}
734 for line in block:
735 # Split line
736 key, val = split_line(line)
737
738 if key == "aut-num":
739 m = re.match(r"^(AS|as)(\d+)", val)
740 if m:
741 autnum["asn"] = m.group(2)
742
0365119d 743 elif key == "org":
e7d612e5 744 autnum[key] = val.upper()
6ffd06b5 745
426e0bee
PM
746 elif key == "descr":
747 # Save the first description line as well...
748 if not key in autnum:
749 autnum[key] = val
750
6ffd06b5 751 # Skip empty objects
426e0bee 752 if not autnum or not "asn" in autnum:
6ffd06b5
MT
753 return
754
426e0bee
PM
755 # Insert a dummy organisation handle into our temporary organisations
756 # table in case the AS does not have an organisation handle set, but
757 # has a description (a quirk often observed in APNIC area), so we can
758 # later display at least some string for this AS.
759 if not "org" in autnum:
760 if "descr" in autnum:
761 autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn")
762
763 self.db.execute("INSERT INTO _organizations(handle, name, source) \
764 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
765 autnum.get("org"), autnum.get("descr"), source_key,
766 )
767 else:
768 log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
769 autnum.get("asn"))
770 return
771
6ffd06b5 772 # Insert into database
28b08385
PM
773 self.db.execute("INSERT INTO _autnums(number, organization, source) \
774 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
0365119d 775 organization = excluded.organization",
28b08385 776 autnum.get("asn"), autnum.get("org"), source_key,
6ffd06b5
MT
777 )
778
28b08385 779 def _parse_inetnum_block(self, block, source_key, validcountries = None):
84187ab5 780 log.debug("Parsing inetnum block:")
aadac4c5
PM
781
782 inetnum = {}
783 for line in block:
84187ab5 784 log.debug(line)
aadac4c5
PM
785
786 # Split line
787 key, val = split_line(line)
788
84187ab5
PM
789 # Filter any inetnum records which are only referring to IP space
790 # not managed by that specific RIR...
791 if key == "netname":
5254e5fc 792 if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()):
2ba6ed07 793 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
84187ab5
PM
794 return
795
aadac4c5
PM
796 if key == "inetnum":
797 start_address, delim, end_address = val.partition("-")
798
799 # Strip any excess space
800 start_address, end_address = start_address.rstrip(), end_address.strip()
801
845da577
PM
802 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
803 if start_address and not (delim or end_address):
804 try:
805 start_address = ipaddress.ip_network(start_address, strict=False)
806 except ValueError:
807 start_address = start_address.split("/")
38d0faea 808 ldigits = start_address[0].count(".")
845da577
PM
809
810 # How many octets do we need to add?
811 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
2ad36187 812 if ldigits == 1:
845da577 813 start_address = start_address[0] + ".0.0/" + start_address[1]
2ad36187 814 elif ldigits == 2:
845da577
PM
815 start_address = start_address[0] + ".0/" + start_address[1]
816 else:
817 log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line)
818 return
819
820 try:
821 start_address = ipaddress.ip_network(start_address, strict=False)
822 except ValueError:
823 log.warning("Could not parse line in LACNIC DB format: %s" % line)
824 return
825
826 # Enumerate first and last IP address of this network
827 end_address = start_address[-1]
828 start_address = start_address[0]
829
830 else:
831 # Convert to IP address
832 try:
833 start_address = ipaddress.ip_address(start_address)
834 end_address = ipaddress.ip_address(end_address)
835 except ValueError:
836 log.warning("Could not parse line: %s" % line)
837 return
aadac4c5 838
1814283b 839 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
aadac4c5
PM
840
841 elif key == "inet6num":
1814283b 842 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
aadac4c5
PM
843
844 elif key == "country":
7434e5e0
PM
845 val = val.upper()
846
b6b2b331 847 # Catch RIR data objects with more than one country code...
7434e5e0 848 if not key in inetnum:
b6b2b331
PM
849 inetnum[key] = []
850 else:
7434e5e0 851 if val in inetnum.get("country"):
b6b2b331
PM
852 # ... but keep this list distinct...
853 continue
854
01e5f0ff
PM
855 # When people set country codes to "UK", they actually mean "GB"
856 if val == "UK":
857 val = "GB"
858
7434e5e0 859 inetnum[key].append(val)
aadac4c5 860
183b2f74
MT
861 # Parse the geofeed attribute
862 elif key == "geofeed":
863 inetnum["geofeed"] = val
864
865 # Parse geofeed when used as a remark
45c0536c
MT
866 elif key == "remarks":
867 m = re.match(r"^(?:Geofeed)\s+(https://.*)", val)
183b2f74
MT
868 if m:
869 inetnum["geofeed"] = m.group(1)
870
aadac4c5 871 # Skip empty objects
002deb6b 872 if not inetnum or not "country" in inetnum:
aadac4c5
PM
873 return
874
b6b2b331
PM
875 # Prepare skipping objects with unknown country codes...
876 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
877
1814283b
PM
878 # Iterate through all networks enumerated from above, check them for plausibility and insert
879 # them into the database, if _check_parsed_network() succeeded
880 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
881 if self._check_parsed_network(single_network):
b6b2b331
PM
882 # Skip objects with unknown country codes if they are valid to avoid log spam...
883 if validcountries and invalidcountries:
884 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
885 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
48770ca8 886 break
7138b4ac
PM
887
888 # Everything is fine here, run INSERT statement...
b6b2b331
PM
889 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
890 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
891 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
1814283b 892 )
aadac4c5 893
183b2f74
MT
894 # Update any geofeed information
895 geofeed = inetnum.get("geofeed", None)
896
d96702a2
MT
897 # Make sure that this is a HTTPS URL
898 if geofeed and not geofeed.startswith("https://"):
899 log.warning("Geofeed URL is not using HTTPS: %s" % geofeed)
900 geofeed = None
901
183b2f74
MT
902 # Store/update any geofeeds
903 if geofeed:
904 self.db.execute("""
905 INSERT INTO
906 network_geofeeds(
907 network,
908 url
909 )
910 VALUES(
911 %s, %s
912 )
913 ON CONFLICT (network) DO
914 UPDATE SET url = excluded.url""",
915 "%s" % single_network, geofeed,
916 )
917
918 # Delete any previous geofeeds
919 else:
920 self.db.execute("DELETE FROM network_geofeeds WHERE network = %s",
921 "%s" % single_network)
922
28b08385 923 def _parse_org_block(self, block, source_key):
6ffd06b5
MT
924 org = {}
925 for line in block:
926 # Split line
927 key, val = split_line(line)
928
e7d612e5
PM
929 if key == "organisation":
930 org[key] = val.upper()
931 elif key == "org-name":
6ffd06b5
MT
932 org[key] = val
933
934 # Skip empty objects
935 if not org:
936 return
937
28b08385
PM
938 self.db.execute("INSERT INTO _organizations(handle, name, source) \
939 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
0365119d 940 UPDATE SET name = excluded.name",
28b08385 941 org.get("organisation"), org.get("org-name"), source_key,
6ffd06b5
MT
942 )
943
28b08385 944 def _parse_line(self, line, source_key, validcountries = None):
429a43d1
MT
945 # Skip version line
946 if line.startswith("2"):
947 return
6ffd06b5 948
429a43d1
MT
949 # Skip comments
950 if line.startswith("#"):
951 return
6ffd06b5 952
429a43d1
MT
953 try:
954 registry, country_code, type, line = line.split("|", 3)
955 except:
956 log.warning("Could not parse line: %s" % line)
957 return
6ffd06b5 958
84b175e2
PM
959 # Skip any lines that are for stats only or do not have a country
960 # code at all (avoids log spam below)
961 if not country_code or country_code == '*':
962 return
963
964 # Skip objects with unknown country codes
965 if validcountries and country_code not in validcountries:
966 log.warning("Skipping line with bogus country '%s': %s" % \
967 (country_code, line))
6ffd06b5
MT
968 return
969
429a43d1 970 if type in ("ipv6", "ipv4"):
28b08385 971 return self._parse_ip_line(country_code, type, line, source_key)
429a43d1 972
28b08385 973 def _parse_ip_line(self, country, type, line, source_key):
429a43d1
MT
974 try:
975 address, prefix, date, status, organization = line.split("|")
976 except ValueError:
977 organization = None
978
979 # Try parsing the line without organization
980 try:
981 address, prefix, date, status = line.split("|")
982 except ValueError:
983 log.warning("Unhandled line format: %s" % line)
984 return
985
986 # Skip anything that isn't properly assigned
987 if not status in ("assigned", "allocated"):
988 return
989
990 # Cast prefix into an integer
991 try:
992 prefix = int(prefix)
993 except:
994 log.warning("Invalid prefix: %s" % prefix)
7177031f 995 return
429a43d1
MT
996
997 # Fix prefix length for IPv4
998 if type == "ipv4":
999 prefix = 32 - int(math.log(prefix, 2))
1000
1001 # Try to parse the address
1002 try:
1003 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
1004 except ValueError:
1005 log.warning("Invalid IP address: %s" % address)
1006 return
1007
bd341642
PM
1008 if not self._check_parsed_network(network):
1009 return
1010
b6b2b331
PM
1011 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
1012 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
87b3e102 1013 UPDATE SET country = excluded.country",
b6b2b331 1014 "%s" % network, country, [country], source_key,
6ffd06b5
MT
1015 )
1016
92403f39
PM
1017 def _import_as_names_from_arin(self):
1018 downloader = location.importer.Downloader()
1019
1020 # XXX: Download AS names file from ARIN (note that these names appear to be quite
1021 # technical, not intended for human consumption, as description fields in
1022 # organisation handles for other RIRs are - however, this is what we have got,
1023 # and in some cases, it might be still better than nothing)
85e44d9a
MT
1024 for line in downloader.request_lines("https://ftp.arin.net/info/asn.txt"):
1025 # Valid lines start with a space, followed by the number of the Autonomous System ...
1026 if not line.startswith(" "):
1027 continue
92403f39 1028
85e44d9a
MT
1029 # Split line and check if there is a valid ASN in it...
1030 asn, name = line.split()[0:2]
92403f39 1031
85e44d9a
MT
1032 try:
1033 asn = int(asn)
1034 except ValueError:
1035 log.debug("Skipping ARIN AS names line not containing an integer for ASN")
1036 continue
92403f39 1037
85e44d9a
MT
1038 # Filter invalid ASNs...
1039 if not self._check_parsed_asn(asn):
1040 continue
92403f39 1041
85e44d9a
MT
1042 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
1043 if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|LACNIC|RIPE|IANA)(?:\d?$|\-)", name):
1044 continue
92403f39 1045
85e44d9a
MT
1046 # Bail out in case the AS name contains anything we do not expect here...
1047 if re.search(r"[^a-zA-Z0-9-_]", name):
1048 log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
1049 (asn, name))
92403f39 1050
85e44d9a
MT
1051 # Things look good here, run INSERT statement and skip this one if we already have
1052 # a (better?) name for this Autonomous System...
1053 self.db.execute("""
1054 INSERT INTO autnums(
1055 number,
92403f39 1056 name,
85e44d9a
MT
1057 source
1058 ) VALUES (%s, %s, %s)
1059 ON CONFLICT (number) DO NOTHING""",
1060 asn,
1061 name,
1062 "ARIN",
1063 )
92403f39 1064
83d61c46
MT
1065 def handle_update_announcements(self, ns):
1066 server = ns.server[0]
1067
22d8d199
MT
1068 with self.db.transaction():
1069 if server.startswith("/"):
1070 self._handle_update_announcements_from_bird(server)
22d8d199
MT
1071
1072 # Purge anything we never want here
1073 self.db.execute("""
1074 -- Delete default routes
1075 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
1076
1077 -- Delete anything that is not global unicast address space
1078 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
1079
1080 -- DELETE "current network" address space
1081 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
1082
1083 -- DELETE local loopback address space
1084 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
1085
1086 -- DELETE RFC 1918 address space
1087 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
1088 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
1089 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
1090
1091 -- DELETE test, benchmark and documentation address space
1092 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
1093 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
1094 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
1095 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
1096 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
1097
1098 -- DELETE CGNAT address space (RFC 6598)
1099 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
1100
1101 -- DELETE link local address space
1102 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
1103
b4d5b2a6 1104 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
22d8d199 1105 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
b4d5b2a6 1106 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
22d8d199
MT
1107
1108 -- DELETE multicast and reserved address space
1109 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
1110 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
1111
1112 -- Delete networks that are too small to be in the global routing table
1113 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
1114 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
1115
1116 -- Delete any non-public or reserved ASNs
1117 DELETE FROM announcements WHERE NOT (
1118 (autnum >= 1 AND autnum <= 23455)
1119 OR
1120 (autnum >= 23457 AND autnum <= 64495)
1121 OR
1122 (autnum >= 131072 AND autnum <= 4199999999)
1123 );
1124
1125 -- Delete everything that we have not seen for 14 days
1126 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
1127 """)
1128
1129 def _handle_update_announcements_from_bird(self, server):
1130 # Pre-compile the regular expression for faster searching
35e5f041 1131 route = re.compile(b"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
22d8d199
MT
1132
1133 log.info("Requesting routing table from Bird (%s)" % server)
1134
35e5f041
MT
1135 aggregated_networks = []
1136
22d8d199
MT
1137 # Send command to list all routes
1138 for line in self._bird_cmd(server, "show route"):
1139 m = route.match(line)
1140 if not m:
b767db8d
MT
1141 # Skip empty lines
1142 if not line:
1143 pass
1144
45fa97cd 1145 # Ignore any header lines with the name of the routing table
b767db8d
MT
1146 elif line.startswith(b"Table"):
1147 pass
45fa97cd
MT
1148
1149 # Log anything else
b767db8d
MT
1150 else:
1151 log.debug("Could not parse line: %s" % line.decode())
1152
22d8d199
MT
1153 continue
1154
1155 # Fetch the extracted network and ASN
1156 network, autnum = m.groups()
1157
35e5f041
MT
1158 # Decode into strings
1159 if network:
1160 network = network.decode()
1161 if autnum:
1162 autnum = autnum.decode()
1163
1164 # Collect all aggregated networks
1165 if not autnum:
1166 log.debug("%s is an aggregated network" % network)
1167 aggregated_networks.append(network)
1168 continue
1169
22d8d199
MT
1170 # Insert it into the database
1171 self.db.execute("INSERT INTO announcements(network, autnum) \
1172 VALUES(%s, %s) ON CONFLICT (network) DO \
1173 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
35e5f041 1174 network, autnum,
22d8d199
MT
1175 )
1176
35e5f041
MT
1177 # Process any aggregated networks
1178 for network in aggregated_networks:
1179 log.debug("Processing aggregated network %s" % network)
1180
1181 # Run "show route all" for each network
1182 for line in self._bird_cmd(server, "show route %s all" % network):
1183 # Try finding the path
1184 m = re.match(b"\s+BGP\.as_path:.* (\d+) {\d+}$", line)
1185 if m:
1186 # Select the last AS number in the path
1187 autnum = m.group(1).decode()
1188
1189 # Insert it into the database
1190 self.db.execute("INSERT INTO announcements(network, autnum) \
1191 VALUES(%s, %s) ON CONFLICT (network) DO \
1192 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1193 network, autnum,
1194 )
1195
1196 # We don't need to process any more
1197 break
1198
22d8d199
MT
1199 def _bird_cmd(self, socket_path, command):
1200 # Connect to the socket
1201 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1202 s.connect(socket_path)
cedee656 1203
22d8d199
MT
1204 # Allocate some buffer
1205 buffer = b""
83d61c46 1206
bbcefb38
MT
1207 log.debug("Sending Bird command: %s" % command)
1208
22d8d199
MT
1209 # Send the command
1210 s.send(b"%s\n" % command.encode())
209c04b6 1211
22d8d199
MT
1212 while True:
1213 # Fill up the buffer
1214 buffer += s.recv(4096)
209c04b6 1215
22d8d199
MT
1216 while True:
1217 # Search for the next newline
1218 pos = buffer.find(b"\n")
209c04b6 1219
22d8d199
MT
1220 # If we cannot find one, we go back and read more data
1221 if pos <= 0:
1222 break
209c04b6 1223
22d8d199
MT
1224 # Cut after the newline character
1225 pos += 1
b89cee80 1226
22d8d199
MT
1227 # Split the line we want and keep the rest in buffer
1228 line, buffer = buffer[:pos], buffer[pos:]
83d61c46 1229
bbcefb38
MT
1230 # Try parsing any status lines
1231 if len(line) > 4 and line[:4].isdigit() and line[4] in (32, 45):
1232 code, delim, line = int(line[:4]), line[4], line[5:]
1233
1234 log.debug("Received response code %s from bird" % code)
1235
1236 # End of output
1237 if code == 0:
1238 return
1239
1240 # Ignore hello line
1241 elif code == 1:
1242 continue
83d61c46 1243
22d8d199
MT
1244 # Otherwise return the line
1245 yield line
83d61c46 1246
0fcdd689 1247 def handle_update_geofeeds(self, ns):
ef8d1f67
MT
1248 # Sync geofeeds
1249 with self.db.transaction():
1250 # Delete all geofeeds which are no longer linked
1251 self.db.execute("""
1252 DELETE FROM
1253 geofeeds
1254 WHERE
1255 NOT EXISTS (
1256 SELECT
1257 1
1258 FROM
1259 network_geofeeds
1260 WHERE
1261 geofeeds.url = network_geofeeds.url
1262 )""",
1263 )
1264
1265 # Copy all geofeeds
1266 self.db.execute("""
1267 INSERT INTO
1268 geofeeds(
1269 url
1270 )
1271 SELECT
1272 url
1273 FROM
1274 network_geofeeds
1275 ON CONFLICT (url)
1276 DO NOTHING
1277 """,
1278 )
1279
0fcdd689
MT
1280 # Fetch all Geofeeds that require an update
1281 geofeeds = self.db.query("""
1282 SELECT
1283 id,
1284 url
1285 FROM
1286 geofeeds
1287 WHERE
1288 updated_at IS NULL
1289 OR
1290 updated_at <= CURRENT_TIMESTAMP - INTERVAL '1 week'
1291 ORDER BY
1292 id
1293 """)
1294
1295 with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
1296 results = executor.map(self._fetch_geofeed, geofeeds)
1297
fa33b321 1298 # Fetch all results to raise any exceptions
0fcdd689 1299 for result in results:
fa33b321 1300 pass
0fcdd689
MT
1301
1302 def _fetch_geofeed(self, geofeed):
1303 log.debug("Fetching Geofeed %s" % geofeed.url)
1304
1305 with self.db.transaction():
1306 # Open the URL
1307 try:
1308 req = urllib.request.Request(geofeed.url, headers={
1309 "User-Agent" : "location/%s" % location.__version__,
1310
1311 # We expect some plain text file in CSV format
1312 "Accept" : "text/csv, text/plain",
1313 })
1314
1315 # XXX set proxy
1316
1317 # Send the request
1318 with urllib.request.urlopen(req, timeout=10) as f:
1319 # Remove any previous data
1320 self.db.execute("DELETE FROM geofeed_networks \
1321 WHERE geofeed_id = %s", geofeed.id)
1322
5e632485
MT
1323 lineno = 0
1324
0fcdd689
MT
1325 # Read the output line by line
1326 for line in f:
5e632485
MT
1327 lineno += 1
1328
56f147ce
MT
1329 try:
1330 line = line.decode()
1331
1332 # Ignore any lines we cannot decode
1333 except UnicodeDecodeError:
1334 log.debug("Could not decode line %s in %s" \
1335 % (lineno, geofeed.url))
1336 continue
0fcdd689
MT
1337
1338 # Strip any newline
1339 line = line.rstrip()
1340
1341 # Skip empty lines
1342 if not line:
1343 continue
1344
1345 # Try to parse the line
1346 try:
1347 fields = line.split(",", 5)
1348 except ValueError:
1349 log.debug("Could not parse line: %s" % line)
1350 continue
1351
1352 # Check if we have enough fields
1353 if len(fields) < 4:
1354 log.debug("Not enough fields in line: %s" % line)
1355 continue
1356
1357 # Fetch all fields
1358 network, country, region, city, = fields[:4]
1359
1360 # Try to parse the network
1361 try:
1362 network = ipaddress.ip_network(network, strict=False)
1363 except ValueError:
1364 log.debug("Could not parse network: %s" % network)
1365 continue
1366
72908243 1367 # Strip any excess whitespace from country codes
4ed16a14
MT
1368 country = country.strip()
1369
1370 # Make the country code uppercase
1371 country = country.upper()
72908243
MT
1372
1373 # Check the country code
5e632485
MT
1374 if not country:
1375 log.debug("Empty country code in Geofeed %s line %s" \
1376 % (geofeed.url, lineno))
1377 continue
1378
1379 elif not location.country_code_is_valid(country):
8228a917
MT
1380 log.debug("Invalid country code in Geofeed %s:%s: %s" \
1381 % (geofeed.url, lineno, country))
72908243 1382 continue
0fcdd689
MT
1383
1384 # Write this into the database
1385 self.db.execute("""
1386 INSERT INTO
1387 geofeed_networks (
1388 geofeed_id,
1389 network,
1390 country,
1391 region,
1392 city
1393 )
1394 VALUES (%s, %s, %s, %s, %s)""",
1395 geofeed.id,
1396 "%s" % network,
1397 country,
1398 region,
1399 city,
1400 )
1401
1402 # Catch any HTTP errors
1403 except urllib.request.HTTPError as e:
ce308393
MT
1404 self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
1405 WHERE id = %s", e.code, "%s" % e, geofeed.id)
0fcdd689 1406
ce308393
MT
1407 # Catch any other errors and connection timeouts
1408 except (urllib.request.URLError, TimeoutError) as e:
1409 log.debug("Could not fetch URL %s: %s" % (geofeed.url, e))
0fcdd689 1410
ce308393
MT
1411 self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
1412 WHERE id = %s", 599, "%s" % e, geofeed.id)
15437681 1413
0fcdd689
MT
1414 # Mark the geofeed as updated
1415 else:
1416 self.db.execute("""
1417 UPDATE
1418 geofeeds
1419 SET
1420 updated_at = CURRENT_TIMESTAMP,
ce308393
MT
1421 status = NULL,
1422 error = NULL
0fcdd689
MT
1423 WHERE
1424 id = %s""",
1425 geofeed.id,
1426 )
1427
d7fc3057
MT
1428 def handle_update_overrides(self, ns):
1429 with self.db.transaction():
68fdeb3b
PM
1430 # Only drop manually created overrides, as we can be reasonably sure to have them,
1431 # and preserve the rest. If appropriate, it is deleted by correspondent functions.
d7fc3057 1432 self.db.execute("""
68fdeb3b
PM
1433 DELETE FROM autnum_overrides WHERE source = 'manual';
1434 DELETE FROM network_overrides WHERE source = 'manual';
d7fc3057
MT
1435 """)
1436
dcef2ba4
PM
1437 # Update overrides for various cloud providers big enough to publish their own IP
1438 # network allocation lists in a machine-readable format...
1439 self._update_overrides_for_aws()
1440
69b3d894
PM
1441 # Update overrides for Spamhaus DROP feeds...
1442 self._update_overrides_for_spamhaus_drop()
1443
d7fc3057
MT
1444 for file in ns.files:
1445 log.info("Reading %s..." % file)
1446
1447 with open(file, "rb") as f:
1448 for type, block in location.importer.read_blocks(f):
1449 if type == "net":
1450 network = block.get("net")
1451 # Try to parse and normalise the network
1452 try:
1453 network = ipaddress.ip_network(network, strict=False)
1454 except ValueError as e:
1455 log.warning("Invalid IP network: %s: %s" % (network, e))
1456 continue
1457
94dfab8c
MT
1458 # Prevent that we overwrite all networks
1459 if network.prefixlen == 0:
1460 log.warning("Skipping %s: You cannot overwrite default" % network)
1461 continue
1462
d7fc3057
MT
1463 self.db.execute("""
1464 INSERT INTO network_overrides(
1465 network,
1466 country,
39ee3120 1467 source,
d7fc3057
MT
1468 is_anonymous_proxy,
1469 is_satellite_provider,
e17e804e
PM
1470 is_anycast,
1471 is_drop
39ee3120 1472 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
d7fc3057
MT
1473 ON CONFLICT (network) DO NOTHING""",
1474 "%s" % network,
1475 block.get("country"),
39ee3120 1476 "manual",
28d29b7c
MT
1477 self._parse_bool(block, "is-anonymous-proxy"),
1478 self._parse_bool(block, "is-satellite-provider"),
1479 self._parse_bool(block, "is-anycast"),
e17e804e 1480 self._parse_bool(block, "drop"),
d7fc3057
MT
1481 )
1482
f476cdfd
MT
1483 elif type == "aut-num":
1484 autnum = block.get("aut-num")
d7fc3057
MT
1485
1486 # Check if AS number begins with "AS"
1487 if not autnum.startswith("AS"):
1488 log.warning("Invalid AS number: %s" % autnum)
1489 continue
1490
1491 # Strip "AS"
1492 autnum = autnum[2:]
1493
1494 self.db.execute("""
1495 INSERT INTO autnum_overrides(
1496 number,
1497 name,
bd1aa6a1 1498 country,
39ee3120 1499 source,
d7fc3057
MT
1500 is_anonymous_proxy,
1501 is_satellite_provider,
e17e804e
PM
1502 is_anycast,
1503 is_drop
39ee3120 1504 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
d7fc3057 1505 ON CONFLICT DO NOTHING""",
bd1aa6a1
MT
1506 autnum,
1507 block.get("name"),
1508 block.get("country"),
39ee3120 1509 "manual",
28d29b7c
MT
1510 self._parse_bool(block, "is-anonymous-proxy"),
1511 self._parse_bool(block, "is-satellite-provider"),
1512 self._parse_bool(block, "is-anycast"),
e17e804e 1513 self._parse_bool(block, "drop"),
d7fc3057
MT
1514 )
1515
1516 else:
03d24a9b 1517 log.warning("Unsupported type: %s" % type)
d7fc3057 1518
dcef2ba4
PM
1519 def _update_overrides_for_aws(self):
1520 # Download Amazon AWS IP allocation file to create overrides...
1521 downloader = location.importer.Downloader()
1522
1523 try:
85e44d9a
MT
1524 # Fetch IP ranges
1525 f = downloader.retrieve("https://ip-ranges.amazonaws.com/ip-ranges.json")
1526
1527 # Parse downloaded file
1528 aws_ip_dump = json.load(f)
dcef2ba4
PM
1529 except Exception as e:
1530 log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
1531 return
1532
68fdeb3b
PM
1533 # At this point, we can assume the downloaded file to be valid
1534 self.db.execute("""
1535 DELETE FROM network_overrides WHERE source = 'Amazon AWS IP feed';
1536 """)
1537
dcef2ba4
PM
1538 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1539 # there seems to be no machine-readable version available of this other than
1540 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1541 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1542 # was helpful here as well.
1543 aws_region_country_map = {
1544 "af-south-1": "ZA",
1545 "ap-east-1": "HK",
1546 "ap-south-1": "IN",
1547 "ap-south-2": "IN",
1548 "ap-northeast-3": "JP",
1549 "ap-northeast-2": "KR",
1550 "ap-southeast-1": "SG",
1551 "ap-southeast-2": "AU",
1552 "ap-southeast-3": "MY",
1553 "ap-southeast-4": "AU",
c9486b13 1554 "ap-southeast-6": "AP", # XXX: Precise location not documented anywhere
dcef2ba4
PM
1555 "ap-northeast-1": "JP",
1556 "ca-central-1": "CA",
c9486b13 1557 "ca-west-1": "CA",
dcef2ba4
PM
1558 "eu-central-1": "DE",
1559 "eu-central-2": "CH",
1560 "eu-west-1": "IE",
1561 "eu-west-2": "GB",
1562 "eu-south-1": "IT",
1563 "eu-south-2": "ES",
1564 "eu-west-3": "FR",
1565 "eu-north-1": "SE",
ad017947 1566 "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
dcef2ba4
PM
1567 "me-central-1": "AE",
1568 "me-south-1": "BH",
1569 "sa-east-1": "BR"
1570 }
1571
1572 # Fetch all valid country codes to check parsed networks aganist...
1573 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
1574 validcountries = []
1575
1576 for row in rows:
1577 validcountries.append(row.country_code)
1578
1579 with self.db.transaction():
1580 for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
1581 try:
1582 network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
1583 except ValueError:
1584 log.warning("Unable to parse line: %s" % snetwork)
1585 continue
1586
1587 # Sanitize parsed networks...
1588 if not self._check_parsed_network(network):
1589 continue
1590
1591 # Determine region of this network...
1592 region = snetwork["region"]
1593 cc = None
1594 is_anycast = False
1595
1596 # Any region name starting with "us-" will get "US" country code assigned straight away...
1597 if region.startswith("us-"):
1598 cc = "US"
1599 elif region.startswith("cn-"):
1600 # ... same goes for China ...
1601 cc = "CN"
1602 elif region == "GLOBAL":
1603 # ... funny region name for anycast-like networks ...
1604 is_anycast = True
1605 elif region in aws_region_country_map:
1606 # ... assign looked up country code otherwise ...
1607 cc = aws_region_country_map[region]
1608 else:
1609 # ... and bail out if we are missing something here
1610 log.warning("Unable to determine country code for line: %s" % snetwork)
1611 continue
1612
1613 # Skip networks with unknown country codes
1614 if not is_anycast and validcountries and cc not in validcountries:
1615 log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1616 (cc, network))
1617 return
1618
1619 # Conduct SQL statement...
1620 self.db.execute("""
1621 INSERT INTO network_overrides(
1622 network,
1623 country,
1624 source,
1625 is_anonymous_proxy,
1626 is_satellite_provider,
1627 is_anycast
1628 ) VALUES (%s, %s, %s, %s, %s, %s)
1629 ON CONFLICT (network) DO NOTHING""",
1630 "%s" % network,
1631 cc,
1632 "Amazon AWS IP feed",
1633 None,
1634 None,
1635 is_anycast,
1636 )
1637
1638
69b3d894
PM
1639 def _update_overrides_for_spamhaus_drop(self):
1640 downloader = location.importer.Downloader()
1641
5acac2a4
PM
1642 ip_lists = [
1643 ("SPAMHAUS-DROP", "https://www.spamhaus.org/drop/drop.txt"),
1644 ("SPAMHAUS-EDROP", "https://www.spamhaus.org/drop/edrop.txt"),
1645 ("SPAMHAUS-DROPV6", "https://www.spamhaus.org/drop/dropv6.txt")
69b3d894
PM
1646 ]
1647
5acac2a4
PM
1648 asn_lists = [
1649 ("SPAMHAUS-ASNDROP", "https://www.spamhaus.org/drop/asndrop.txt")
69b3d894
PM
1650 ]
1651
5acac2a4
PM
1652 for name, url in ip_lists:
1653 # Fetch IP list from given URL
85e44d9a
MT
1654 f = downloader.retrieve(url)
1655
1656 # Split into lines
1657 fcontent = f.readlines()
69b3d894 1658
69b3d894 1659 with self.db.transaction():
5acac2a4
PM
1660 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
1661 # downloads.
1662 if len(fcontent) > 10:
b56bf4bf 1663 self.db.execute("DELETE FROM network_overrides WHERE source = %s", name)
5acac2a4
PM
1664 else:
1665 log.error("%s (%s) returned likely bogus file, ignored" % (name, url))
1666 continue
1667
1668 # Iterate through every line, filter comments and add remaining networks to
1669 # the override table in case they are valid...
69b3d894 1670 for sline in fcontent:
69b3d894
PM
1671 # The response is assumed to be encoded in UTF-8...
1672 sline = sline.decode("utf-8")
1673
1674 # Comments start with a semicolon...
1675 if sline.startswith(";"):
1676 continue
1677
1678 # Extract network and ignore anything afterwards...
1679 try:
1680 network = ipaddress.ip_network(sline.split()[0], strict=False)
1681 except ValueError:
1682 log.error("Unable to parse line: %s" % sline)
1683 continue
1684
1685 # Sanitize parsed networks...
1686 if not self._check_parsed_network(network):
5acac2a4
PM
1687 log.warning("Skipping bogus network found in %s (%s): %s" % \
1688 (name, url, network))
69b3d894
PM
1689 continue
1690
1691 # Conduct SQL statement...
1692 self.db.execute("""
1693 INSERT INTO network_overrides(
1694 network,
1695 source,
1696 is_drop
1697 ) VALUES (%s, %s, %s)
148f68bf 1698 ON CONFLICT (network) DO UPDATE SET is_drop = True""",
69b3d894 1699 "%s" % network,
5acac2a4 1700 name,
69b3d894
PM
1701 True
1702 )
1703
5acac2a4 1704 for name, url in asn_lists:
85e44d9a
MT
1705 # Fetch URL
1706 f = downloader.retrieve(url)
69b3d894 1707
5acac2a4
PM
1708 # Split into lines
1709 fcontent = f.readlines()
1710
69b3d894 1711 with self.db.transaction():
5acac2a4
PM
1712 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
1713 # downloads.
1714 if len(fcontent) > 10:
b56bf4bf 1715 self.db.execute("DELETE FROM autnum_overrides WHERE source = %s", name)
5acac2a4
PM
1716 else:
1717 log.error("%s (%s) returned likely bogus file, ignored" % (name, url))
1718 continue
1719
1720 # Iterate through every line, filter comments and add remaining ASNs to
1721 # the override table in case they are valid...
00beb796 1722 for sline in f.readlines():
69b3d894
PM
1723 # The response is assumed to be encoded in UTF-8...
1724 sline = sline.decode("utf-8")
1725
1726 # Comments start with a semicolon...
1727 if sline.startswith(";"):
1728 continue
1729
1730 # Throw away anything after the first space...
1731 sline = sline.split()[0]
1732
1733 # ... strip the "AS" prefix from it ...
1734 sline = sline.strip("AS")
1735
1736 # ... and convert it into an integer. Voila.
1737 asn = int(sline)
1738
1739 # Filter invalid ASNs...
1740 if not self._check_parsed_asn(asn):
5acac2a4
PM
1741 log.warning("Skipping bogus ASN found in %s (%s): %s" % \
1742 (name, url, asn))
69b3d894
PM
1743 continue
1744
1745 # Conduct SQL statement...
1746 self.db.execute("""
1747 INSERT INTO autnum_overrides(
1748 number,
1749 source,
1750 is_drop
1751 ) VALUES (%s, %s, %s)
148f68bf 1752 ON CONFLICT (number) DO UPDATE SET is_drop = True""",
69b3d894 1753 "%s" % asn,
5acac2a4 1754 name,
69b3d894
PM
1755 True
1756 )
1757
28d29b7c
MT
1758 @staticmethod
1759 def _parse_bool(block, key):
1760 val = block.get(key)
1761
1762 # There is no point to proceed when we got None
1763 if val is None:
1764 return
1765
1766 # Convert to lowercase
1767 val = val.lower()
1768
1769 # True
1770 if val in ("yes", "1"):
1771 return True
1772
1773 # False
1774 if val in ("no", "0"):
1775 return False
1776
1777 # Default to None
1778 return None
1779
3ce28dea
MT
1780 @property
1781 def countries(self):
1782 # Fetch all valid country codes to check parsed networks aganist
1783 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
1784
1785 # Return all countries
1786 return [row.country_code for row in rows]
1787
8084b33a
MT
1788 def handle_import_countries(self, ns):
1789 with self.db.transaction():
1790 # Drop all data that we have
1791 self.db.execute("TRUNCATE TABLE countries")
1792
1793 for file in ns.file:
1794 for line in file:
1795 line = line.rstrip()
1796
1797 # Ignore any comments
1798 if line.startswith("#"):
1799 continue
1800
1801 try:
1802 country_code, continent_code, name = line.split(maxsplit=2)
1803 except:
1804 log.warning("Could not parse line: %s" % line)
1805 continue
1806
1807 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
1808 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
1809
6ffd06b5
MT
1810
1811def split_line(line):
1812 key, colon, val = line.partition(":")
1813
1814 # Strip any excess space
1815 key = key.strip()
1816 val = val.strip()
78ff0cf2 1817
6ffd06b5 1818 return key, val
78ff0cf2
MT
1819
1820def main():
1821 # Run the command line interface
1822 c = CLI()
1823 c.run()
1824
1825main()