]> git.ipfire.org Git - people/ms/libloc.git/blame - src/scripts/location-importer.in
importer: Make the export 200x faster
[people/ms/libloc.git] / src / scripts / location-importer.in
CommitLineData
78ff0cf2
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
5c7cfeb2 6# Copyright (C) 2020-2024 IPFire Development Team <info@ipfire.org> #
78ff0cf2
MT
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import argparse
98d9079b 21import asyncio
f4f3d8ad 22import csv
81b28603 23import functools
a6fedd9f 24import http.client
fa5dccf1 25import io
6ffd06b5 26import ipaddress
dcef2ba4 27import json
78ff0cf2 28import logging
6ffd06b5
MT
29import math
30import re
22d8d199 31import socket
78ff0cf2 32import sys
3ce28dea 33import urllib.error
78ff0cf2
MT
34
35# Load our location module
36import location
29c6fa22 37import location.database
335946c0 38from location.downloader import Downloader
78ff0cf2
MT
39from location.i18n import _
40
41# Initialise logging
42log = logging.getLogger("location.importer")
43log.propagate = 1
44
43fe570c
PM
45# Define constants
46VALID_ASN_RANGES = (
47 (1, 23455),
48 (23457, 64495),
49 (131072, 4199999999),
50)
51
afc966db
MT
52TRANSLATED_COUNTRIES = {
53 # When people say UK, they mean GB
54 "UK" : "GB",
55}
56
30d32299
MT
57IGNORED_COUNTRIES = set((
58 # Formerly Yugoslavia
59 "YU",
60
61 # Some people use ZZ to say "no country" or to hide the country
62 "ZZ",
63))
64
18a72eac
MT
65# Configure the CSV parser for ARIN
66csv.register_dialect("arin", delimiter=",", quoting=csv.QUOTE_ALL, quotechar="\"")
43fe570c 67
78ff0cf2
MT
68class CLI(object):
69 def parse_cli(self):
70 parser = argparse.ArgumentParser(
71 description=_("Location Importer Command Line Interface"),
72 )
6ffd06b5 73 subparsers = parser.add_subparsers()
78ff0cf2
MT
74
75 # Global configuration flags
76 parser.add_argument("--debug", action="store_true",
77 help=_("Enable debug output"))
bc1f5f53
MT
78 parser.add_argument("--quiet", action="store_true",
79 help=_("Enable quiet mode"))
78ff0cf2
MT
80
81 # version
82 parser.add_argument("--version", action="version",
83 version="%(prog)s @VERSION@")
84
29c6fa22
MT
85 # Database
86 parser.add_argument("--database-host", required=True,
87 help=_("Database Hostname"), metavar=_("HOST"))
88 parser.add_argument("--database-name", required=True,
89 help=_("Database Name"), metavar=_("NAME"))
90 parser.add_argument("--database-username", required=True,
91 help=_("Database Username"), metavar=_("USERNAME"))
92 parser.add_argument("--database-password", required=True,
93 help=_("Database Password"), metavar=_("PASSWORD"))
94
0983f3dd
MT
95 # Write Database
96 write = subparsers.add_parser("write", help=_("Write database to file"))
97 write.set_defaults(func=self.handle_write)
98 write.add_argument("file", nargs=1, help=_("Database File"))
99 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
1164d876 100 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
0983f3dd
MT
101 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
102 write.add_argument("--description", nargs="?", help=_("Sets a description"))
103 write.add_argument("--license", nargs="?", help=_("Sets the license"))
b904896a 104 write.add_argument("--version", type=int, help=_("Database Format Version"))
0983f3dd 105
6ffd06b5
MT
106 # Update WHOIS
107 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
d77d05d1
MT
108 update_whois.add_argument("sources", nargs="*",
109 help=_("Only update these sources"))
6ffd06b5
MT
110 update_whois.set_defaults(func=self.handle_update_whois)
111
83d61c46
MT
112 # Update announcements
113 update_announcements = subparsers.add_parser("update-announcements",
114 help=_("Update BGP Annoucements"))
115 update_announcements.set_defaults(func=self.handle_update_announcements)
116 update_announcements.add_argument("server", nargs=1,
117 help=_("Route Server to connect to"), metavar=_("SERVER"))
118
0fcdd689
MT
119 # Update geofeeds
120 update_geofeeds = subparsers.add_parser("update-geofeeds",
121 help=_("Update Geofeeds"))
122 update_geofeeds.set_defaults(func=self.handle_update_geofeeds)
123
93aad7f7
MT
124 # Update feeds
125 update_feeds = subparsers.add_parser("update-feeds",
126 help=_("Update Feeds"))
f7f8e714
MT
127 update_feeds.add_argument("feeds", nargs="*",
128 help=_("Only update these feeds"))
93aad7f7
MT
129 update_feeds.set_defaults(func=self.handle_update_feeds)
130
d7fc3057
MT
131 # Update overrides
132 update_overrides = subparsers.add_parser("update-overrides",
133 help=_("Update overrides"),
134 )
135 update_overrides.add_argument(
136 "files", nargs="+", help=_("Files to import"),
137 )
138 update_overrides.set_defaults(func=self.handle_update_overrides)
139
8084b33a
MT
140 # Import countries
141 import_countries = subparsers.add_parser("import-countries",
142 help=_("Import countries"),
143 )
144 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
145 help=_("File to import"))
146 import_countries.set_defaults(func=self.handle_import_countries)
147
78ff0cf2
MT
148 args = parser.parse_args()
149
bc1f5f53 150 # Configure logging
78ff0cf2 151 if args.debug:
f9de5e61 152 location.logger.set_level(logging.DEBUG)
bc1f5f53
MT
153 elif args.quiet:
154 location.logger.set_level(logging.WARNING)
78ff0cf2 155
6ffd06b5
MT
156 # Print usage if no action was given
157 if not "func" in args:
158 parser.print_usage()
159 sys.exit(2)
160
78ff0cf2
MT
161 return args
162
98d9079b 163 async def run(self):
78ff0cf2
MT
164 # Parse command line arguments
165 args = self.parse_cli()
166
335946c0
MT
167 # Initialize the downloader
168 self.downloader = Downloader()
169
29c6fa22 170 # Initialise database
6ffd06b5 171 self.db = self._setup_database(args)
29c6fa22 172
78ff0cf2 173 # Call function
98d9079b 174 ret = await args.func(args)
78ff0cf2
MT
175
176 # Return with exit code
177 if ret:
178 sys.exit(ret)
179
180 # Otherwise just exit
181 sys.exit(0)
182
29c6fa22
MT
183 def _setup_database(self, ns):
184 """
185 Initialise the database
186 """
187 # Connect to database
188 db = location.database.Connection(
189 host=ns.database_host, database=ns.database_name,
190 user=ns.database_username, password=ns.database_password,
191 )
192
193 with db.transaction():
194 db.execute("""
83d61c46
MT
195 -- announcements
196 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
197 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
198 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
199 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
200 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
e006a32f 201 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING SPGIST(network inet_ops);
83d61c46 202
6ffd06b5 203 -- autnums
0983f3dd 204 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
26f06e70 205 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
6ffd06b5
MT
206 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
207
8084b33a
MT
208 -- countries
209 CREATE TABLE IF NOT EXISTS countries(
210 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
211 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
212
429a43d1 213 -- networks
83d61c46 214 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
b6b2b331 215 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
26f06e70 216 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
429a43d1 217 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
002deb6b 218 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
e006a32f 219 CREATE INDEX IF NOT EXISTS networks_search ON networks USING SPGIST(network inet_ops);
d7fc3057 220
183b2f74 221 -- geofeeds
0fcdd689
MT
222 CREATE TABLE IF NOT EXISTS geofeeds(
223 id serial primary key,
224 url text,
225 status integer default null,
226 updated_at timestamp without time zone default null
227 );
ce308393 228 ALTER TABLE geofeeds ADD COLUMN IF NOT EXISTS error text;
0fcdd689
MT
229 CREATE UNIQUE INDEX IF NOT EXISTS geofeeds_unique
230 ON geofeeds(url);
231 CREATE TABLE IF NOT EXISTS geofeed_networks(
232 geofeed_id integer references geofeeds(id) on delete cascade,
233 network inet,
234 country text,
235 region text,
236 city text
237 );
238 CREATE INDEX IF NOT EXISTS geofeed_networks_geofeed_id
239 ON geofeed_networks(geofeed_id);
240 CREATE INDEX IF NOT EXISTS geofeed_networks_search
e006a32f 241 ON geofeed_networks USING SPGIST(network inet_ops);
183b2f74 242 CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
b8f3075b 243 ALTER TABLE network_geofeeds ADD COLUMN IF NOT EXISTS source text NOT NULL;
2b96ed27
MT
244 CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique2
245 ON network_geofeeds(network, url);
7f0a6add 246 CREATE INDEX IF NOT EXISTS network_geofeeds_search
e006a32f 247 ON network_geofeeds USING SPGIST(network inet_ops);
7f0a6add
MT
248 CREATE INDEX IF NOT EXISTS network_geofeeds_url
249 ON network_geofeeds(url);
2b96ed27 250 DROP INDEX IF EXISTS network_geofeeds_unique;
183b2f74 251
7808c8b7
MT
252 -- feeds
253 CREATE TABLE IF NOT EXISTS autnum_feeds(
254 number bigint NOT NULL,
255 source text NOT NULL,
256 name text,
257 country text,
258 is_anonymous_proxy boolean,
259 is_satellite_provider boolean,
260 is_anycast boolean,
261 is_drop boolean
262 );
263 CREATE UNIQUE INDEX IF NOT EXISTS autnum_feeds_unique
264 ON autnum_feeds(number, source);
265
c923c1cc
MT
266 CREATE TABLE IF NOT EXISTS network_feeds(
267 network inet NOT NULL,
268 source text NOT NULL,
269 country text,
270 is_anonymous_proxy boolean,
271 is_satellite_provider boolean,
272 is_anycast boolean,
273 is_drop boolean
274 );
275 CREATE UNIQUE INDEX IF NOT EXISTS network_feeds_unique
276 ON network_feeds(network, source);
277 CREATE INDEX IF NOT EXISTS network_feeds_search
e006a32f 278 ON network_feeds USING SPGIST(network inet_ops);
c923c1cc 279
d7fc3057
MT
280 -- overrides
281 CREATE TABLE IF NOT EXISTS autnum_overrides(
282 number bigint NOT NULL,
283 name text,
bd1aa6a1 284 country text,
b8e25b71
MT
285 is_anonymous_proxy boolean,
286 is_satellite_provider boolean,
287 is_anycast boolean
d7fc3057
MT
288 );
289 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
290 ON autnum_overrides(number);
e17e804e 291 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
667ad134 292 ALTER TABLE autnum_overrides DROP COLUMN IF EXISTS source;
d7fc3057
MT
293
294 CREATE TABLE IF NOT EXISTS network_overrides(
295 network inet NOT NULL,
296 country text,
b8e25b71
MT
297 is_anonymous_proxy boolean,
298 is_satellite_provider boolean,
299 is_anycast boolean
d7fc3057
MT
300 );
301 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
302 ON network_overrides(network);
991baf53 303 CREATE INDEX IF NOT EXISTS network_overrides_search
e006a32f 304 ON network_overrides USING SPGIST(network inet_ops);
e17e804e 305 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
667ad134 306 ALTER TABLE network_overrides DROP COLUMN IF EXISTS source;
3efe0297 307
f5a4557d 308 DROP TABLE IF EXISTS geofeed_overrides;
29c6fa22
MT
309 """)
310
311 return db
312
e61a9352
MT
313 def fetch_countries(self):
314 """
315 Returns a list of all countries on the list
316 """
317 # Fetch all valid country codes to check parsed networks aganist...
318 countries = self.db.query("SELECT country_code FROM countries ORDER BY country_code")
319
ae20698a 320 return set((country.country_code for country in countries))
e61a9352 321
98d9079b 322 async def handle_write(self, ns):
0983f3dd
MT
323 """
324 Compiles a database in libloc format out of what is in the database
325 """
0983f3dd 326 # Allocate a writer
1164d876 327 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
0983f3dd
MT
328
329 # Set all metadata
330 if ns.vendor:
331 writer.vendor = ns.vendor
332
333 if ns.description:
334 writer.description = ns.description
335
336 if ns.license:
337 writer.license = ns.license
338
d75593af
MT
339 # Analyze everything for the query planner hopefully making better decisions
340 self.db.execute("ANALYZE")
341
0983f3dd
MT
342 # Add all Autonomous Systems
343 log.info("Writing Autonomous Systems...")
344
345 # Select all ASes with a name
6e97c44b
MT
346 rows = self.db.query("""
347 SELECT
348 autnums.number AS number,
349 COALESCE(
ef1581fb 350 overrides.name,
6e97c44b
MT
351 autnums.name
352 ) AS name
ef1581fb
MT
353 FROM
354 autnums
355 LEFT JOIN
356 autnum_overrides overrides ON autnums.number = overrides.number
357 ORDER BY
358 autnums.number
359 """)
0983f3dd
MT
360
361 for row in rows:
008d5468
MT
362 # Skip AS without names
363 if not row.name:
364 continue
365
0983f3dd
MT
366 a = writer.add_as(row.number)
367 a.name = row.name
368
369 # Add all networks
370 log.info("Writing networks...")
371
c0a5f8f4
MT
372 # Create a new temporary table where we collect
373 # the networks that we are interested in
374 self.db.execute("""
375 CREATE TEMPORARY TABLE
376 n
377 (
378 network inet NOT NULL,
379 autnum integer,
380 country text,
381 is_anonymous_proxy boolean,
382 is_satellite_provider boolean,
383 is_anycast boolean,
384 is_drop boolean
385 )
386 WITH (FILLFACTOR = 50)
387 """)
295a7774 388
c0a5f8f4
MT
389 # Add all known networks
390 self.db.execute("""
391 INSERT INTO
392 n
393 (
394 network
295a7774
MT
395 )
396
0983f3dd 397 SELECT
c0a5f8f4
MT
398 network
399 FROM
400 announcements
bd1aa6a1 401
c0a5f8f4 402 UNION
8e8555bb 403
c0a5f8f4
MT
404 SELECT
405 network
406 FROM
407 networks
408
409 UNION
410
411 SELECT
412 network
413 FROM
414 network_feeds
415
416 UNION
417
418 SELECT
419 network
420 FROM
421 network_overrides
422
423 UNION
424
425 SELECT
426 network
427 FROM
428 geofeed_networks
429 """)
430
431 # Create an index to search through networks faster
432 self.db.execute("""
433 CREATE INDEX
434 n_search
435 ON
436 n
437 USING
438 SPGIST(network)
439 """)
440
441 # Analyze n
442 self.db.execute("ANALYZE n")
443
444 # Apply the AS number to all networks
445 self.db.execute("""
446 -- Join all networks together with their most specific announcements
447 WITH announcements AS (
448 SELECT
449 n.network,
450 announcements.autnum,
451
452 -- Sort all merges and number them so
453 -- that we can later select the best one
454 ROW_NUMBER()
455 OVER
c923c1cc 456 (
c0a5f8f4
MT
457 PARTITION BY
458 n.network
476297aa 459 ORDER BY
c0a5f8f4
MT
460 masklen(announcements.network) DESC
461 ) AS row
462 FROM
463 n
464 JOIN
465 announcements
466 ON
467 announcements.network >>= n.network
468 )
469
470 -- Store the result
471 UPDATE
472 n
473 SET
474 autnum = announcements.autnum
475 FROM
476 announcements
477 WHERE
478 announcements.network = n.network
479 AND
480 announcements.row = 1
481 """,
482 )
483
484 # Apply country information
485 self.db.execute("""
486 WITH networks AS (
487 SELECT
488 n.network,
489 networks.country,
490
491 ROW_NUMBER()
492 OVER
7808c8b7 493 (
c0a5f8f4
MT
494 PARTITION BY
495 n.network
476297aa 496 ORDER BY
c0a5f8f4
MT
497 masklen(networks.network) DESC
498 ) AS row
499 FROM
500 n
501 JOIN
502 networks
503 ON
504 networks.network >>= n.network
505 )
506
507 UPDATE
508 n
509 SET
510 country = networks.country
511 FROM
512 networks
513 WHERE
514 networks.network = n.network
515 AND
516 networks.row = 1
517 """,
518 )
519
520 # Add all country information from Geofeeds
521 self.db.execute("""
522 WITH geofeeds AS (
523 SELECT
524 DISTINCT ON (geofeed_networks.network)
525 geofeed_networks.network,
526 geofeed_networks.country
527 FROM
528 geofeeds
529 JOIN
530 network_geofeeds networks
531 ON
532 geofeeds.url = networks.url
533 JOIN
534 geofeed_networks
535 ON
536 geofeeds.id = geofeed_networks.geofeed_id
537 AND
538 networks.network >>= geofeed_networks.network
539 ),
540
541 networks AS (
542 SELECT
543 n.network,
544 geofeeds.country,
545
546 ROW_NUMBER()
547 OVER
e17e804e 548 (
c0a5f8f4
MT
549 PARTITION BY
550 n.network
476297aa 551 ORDER BY
c0a5f8f4
MT
552 masklen(geofeeds.network) DESC
553 ) AS row
554 FROM
555 n
556 JOIN
557 geofeeds
558 ON
559 geofeeds.network >>= n.network
560 )
561
562 UPDATE
563 n
564 SET
565 country = networks.country
566 FROM
567 networks
568 WHERE
569 networks.network = n.network
570 AND
571 networks.row = 1
572 """,
573 )
574
575 # Apply country and flags from feeds
576 self.db.execute("""
577 WITH networks AS (
578 SELECT
579 n.network,
580 network_feeds.country,
581
582 -- Flags
583 network_feeds.is_anonymous_proxy,
584 network_feeds.is_satellite_provider,
585 network_feeds.is_anycast,
586 network_feeds.is_drop,
587
588 ROW_NUMBER()
589 OVER
c923c1cc 590 (
c0a5f8f4
MT
591 PARTITION BY
592 n.network
476297aa 593 ORDER BY
c0a5f8f4
MT
594 masklen(network_feeds.network) DESC
595 ) AS row
596 FROM
597 n
598 JOIN
599 network_feeds
600 ON
601 network_feeds.network >>= n.network
602 )
603
604 UPDATE
605 n
606 SET
607 country =
608 COALESCE(networks.country, n.country),
609
610 is_anonymous_proxy =
611 COALESCE(networks.is_anonymous_proxy, n.is_anonymous_proxy),
612
613 is_satellite_provider =
614 COALESCE(networks.is_satellite_provider, n.is_satellite_provider),
615
616 is_anycast =
617 COALESCE(networks.is_anycast, n.is_anycast),
618
619 is_drop =
620 COALESCE(networks.is_drop, n.is_drop)
621 FROM
622 networks
623 WHERE
624 networks.network = n.network
625 AND
626 networks.row = 1
627 """,
628 )
629
630 # Apply country and flags from AS feeds
631 self.db.execute("""
632 WITH networks AS (
633 SELECT
634 n.network,
635 autnum_feeds.country,
636
637 -- Flags
638 autnum_feeds.is_anonymous_proxy,
639 autnum_feeds.is_satellite_provider,
640 autnum_feeds.is_anycast,
641 autnum_feeds.is_drop
642 FROM
643 n
644 JOIN
645 autnum_feeds
646 ON
647 autnum_feeds.number = n.autnum
648 )
649
650 UPDATE
651 n
652 SET
653 country =
654 COALESCE(networks.country, n.country),
655
656 is_anonymous_proxy =
657 COALESCE(networks.is_anonymous_proxy, n.is_anonymous_proxy),
658
659 is_satellite_provider =
660 COALESCE(networks.is_satellite_provider, n.is_satellite_provider),
661
662 is_anycast =
663 COALESCE(networks.is_anycast, n.is_anycast),
664
665 is_drop =
666 COALESCE(networks.is_drop, n.is_drop)
667 FROM
668 networks
669 WHERE
670 networks.network = n.network
671 """)
672
673 # Apply network overrides
674 self.db.execute("""
675 WITH networks AS (
676 SELECT
677 n.network,
678 network_overrides.country,
679
680 -- Flags
681 network_overrides.is_anonymous_proxy,
682 network_overrides.is_satellite_provider,
683 network_overrides.is_anycast,
684 network_overrides.is_drop,
685
686 ROW_NUMBER()
687 OVER
7808c8b7 688 (
c0a5f8f4
MT
689 PARTITION BY
690 n.network
476297aa 691 ORDER BY
c0a5f8f4
MT
692 masklen(network_overrides.network) DESC
693 ) AS row
694 FROM
695 n
696 JOIN
697 network_overrides
698 ON
699 network_overrides.network >>= n.network
700 )
701
702 UPDATE
703 n
704 SET
705 country =
706 COALESCE(networks.country, n.country),
707
708 is_anonymous_proxy =
709 COALESCE(networks.is_anonymous_proxy, n.is_anonymous_proxy),
710
711 is_satellite_provider =
712 COALESCE(networks.is_satellite_provider, n.is_satellite_provider),
713
714 is_anycast =
715 COALESCE(networks.is_anycast, n.is_anycast),
716
717 is_drop =
718 COALESCE(networks.is_drop, n.is_drop)
719 FROM
720 networks
721 WHERE
722 networks.network = n.network
723 AND
724 networks.row = 1
725 """)
726
727 # Apply AS overrides
728 self.db.execute("""
729 WITH networks AS (
730 SELECT
731 n.network,
732 autnum_overrides.country,
733
734 -- Flags
735 autnum_overrides.is_anonymous_proxy,
736 autnum_overrides.is_satellite_provider,
737 autnum_overrides.is_anycast,
738 autnum_overrides.is_drop
739 FROM
740 n
741 JOIN
742 autnum_overrides
743 ON
744 autnum_overrides.number = n.autnum
745 )
746
747 UPDATE
748 n
749 SET
750 country =
751 COALESCE(networks.country, n.country),
752
753 is_anonymous_proxy =
754 COALESCE(networks.is_anonymous_proxy, n.is_anonymous_proxy),
755
756 is_satellite_provider =
757 COALESCE(networks.is_satellite_provider, n.is_satellite_provider),
758
759 is_anycast =
760 COALESCE(networks.is_anycast, n.is_anycast),
761
762 is_drop =
763 COALESCE(networks.is_drop, n.is_drop)
764 FROM
765 networks
766 WHERE
767 networks.network = n.network
768 """)
769
770 # Here we could remove some networks that we no longer need, but since we
771 # already have implemented our deduplication/merge algorithm this would not
772 # be necessary.
773
774 # Export the entire temporary table
775 rows = self.db.query("""
776 SELECT
777 *
295a7774 778 FROM
c0a5f8f4
MT
779 n
780 ORDER BY
781 network
0983f3dd
MT
782 """)
783
784 for row in rows:
a7db4f94 785 network = writer.add_network("%s" % row.network)
0983f3dd 786
5372d9c7
MT
787 # Save country
788 if row.country:
789 network.country_code = row.country
790
791 # Save ASN
792 if row.autnum:
793 network.asn = row.autnum
0983f3dd
MT
794
795 # Set flags
796 if row.is_anonymous_proxy:
797 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
798
799 if row.is_satellite_provider:
800 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
801
802 if row.is_anycast:
803 network.set_flag(location.NETWORK_FLAG_ANYCAST)
804
e17e804e
PM
805 if row.is_drop:
806 network.set_flag(location.NETWORK_FLAG_DROP)
807
8084b33a
MT
808 # Add all countries
809 log.info("Writing countries...")
c0a5f8f4
MT
810
811 # Select all countries
812 rows = self.db.query("""
813 SELECT
814 *
815 FROM
816 countries
817 ORDER BY
818 country_code
819 """,
820 )
8084b33a
MT
821
822 for row in rows:
823 c = writer.add_country(row.country_code)
824 c.continent_code = row.continent_code
825 c.name = row.name
826
0983f3dd
MT
827 # Write everything to file
828 log.info("Writing database to file...")
829 for file in ns.file:
830 writer.write(file)
831
98d9079b 832 async def handle_update_whois(self, ns):
3ce28dea 833 # Did we run successfully?
8c2dbbc2
MT
834 success = True
835
836 sources = (
837 # African Network Information Centre
838 ("AFRINIC", (
839 (self._import_standard_format, "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz"),
840 )),
841
842 # Asia Pacific Network Information Centre
843 ("APNIC", (
844 (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz"),
845 (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz"),
846 (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz"),
847 (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz"),
848 )),
849
850 # American Registry for Internet Numbers
851 ("ARIN", (
852 (self._import_extended_format, "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"),
853 (self._import_arin_as_names, "https://ftp.arin.net/pub/resource_registry_service/asns.csv"),
854 )),
855
856 # Japan Network Information Center
857 ("JPNIC", (
858 (self._import_standard_format, "https://ftp.nic.ad.jp/jpirr/jpirr.db.gz"),
859 )),
860
861 # Latin America and Caribbean Network Information Centre
862 ("LACNIC", (
863 (self._import_standard_format, "https://ftp.lacnic.net/lacnic/dbase/lacnic.db.gz"),
864 (self._import_extended_format, "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"),
865 )),
866
867 # Réseaux IP Européens
868 ("RIPE", (
869 (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz"),
870 (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz"),
871 (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz"),
872 (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz"),
873 )),
874 )
84b175e2 875
fa9220b0 876 # Fetch all valid country codes to check parsed networks against
d4697973
MT
877 countries = self.fetch_countries()
878
879 # Check if we have countries
880 if not countries:
881 log.error("Please import countries before importing any WHOIS data")
882 return 1
6ffd06b5 883
3ce28dea 884 # Iterate over all potential sources
8c2dbbc2 885 for name, feeds in sources:
d77d05d1 886 # Skip anything that should not be updated
8c2dbbc2 887 if ns.sources and not name in ns.sources:
d77d05d1
MT
888 continue
889
8c2dbbc2 890 try:
98d9079b 891 await self._process_source(name, feeds, countries)
002deb6b 892
8c2dbbc2
MT
893 # Log an error but continue if an exception occurs
894 except Exception as e:
895 log.error("Error processing source %s" % name, exc_info=True)
896 success = False
897
898 # Return a non-zero exit code for errors
899 return 0 if success else 1
900
98d9079b 901 async def _process_source(self, source, feeds, countries):
8c2dbbc2
MT
902 """
903 This function processes one source
904 """
905 # Wrap everything into one large transaction
906 with self.db.transaction():
907 # Remove all previously imported content
b8f3075b
MT
908 self.db.execute("DELETE FROM autnums WHERE source = %s", source)
909 self.db.execute("DELETE FROM networks WHERE source = %s", source)
910 self.db.execute("DELETE FROM network_geofeeds WHERE source = %s", source)
8c2dbbc2
MT
911
912 # Create some temporary tables to store parsed data
913 self.db.execute("""
914 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
915 organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
916 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
917
918 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
919 name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
920 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
921
922 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text,
923 original_countries text[] NOT NULL, source text NOT NULL)
924 ON COMMIT DROP;
925 CREATE INDEX _rirdata_search ON _rirdata
926 USING BTREE(family(network), masklen(network));
927 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
928 """)
002deb6b 929
8c2dbbc2
MT
930 # Parse all feeds
931 for callback, url, *args in feeds:
932 # Retrieve the feed
335946c0 933 f = self.downloader.retrieve(url)
8c2dbbc2
MT
934
935 # Call the callback
c86209e2
MT
936 with self.db.pipeline():
937 await callback(source, countries, f, *args)
8c2dbbc2
MT
938
939 # Process all parsed networks from every RIR we happen to have access to,
940 # insert the largest network chunks into the networks table immediately...
941 families = self.db.query("""
942 SELECT DISTINCT
943 family(network) AS family
944 FROM
945 _rirdata
946 ORDER BY
947 family(network)
948 """,
949 )
950
951 for family in (row.family for row in families):
952 # Fetch the smallest mask length in our data set
953 smallest = self.db.get("""
954 SELECT
955 MIN(
956 masklen(network)
957 ) AS prefix
fa9220b0
MT
958 FROM
959 _rirdata
8c2dbbc2
MT
960 WHERE
961 family(network) = %s
962 """, family,
fa9220b0 963 )
3ce28dea 964
8c2dbbc2
MT
965 # Copy all networks
966 self.db.execute("""
967 INSERT INTO
968 networks
969 (
970 network,
971 country,
972 original_countries,
973 source
5bfa1bb4 974 )
8c2dbbc2
MT
975 SELECT
976 network,
977 country,
978 original_countries,
979 source
980 FROM
981 _rirdata
982 WHERE
983 masklen(network) = %s
984 AND
985 family(network) = %s
986 ON CONFLICT DO
987 NOTHING""",
988 smallest.prefix,
989 family,
990 )
991
992 # ... determine any other prefixes for this network family, ...
993 prefixes = self.db.query("""
994 SELECT
995 DISTINCT masklen(network) AS prefix
996 FROM
997 _rirdata
998 WHERE
999 family(network) = %s
1000 ORDER BY
1001 masklen(network) ASC
1002 OFFSET 1
1003 """, family,
1004 )
5bfa1bb4 1005
8c2dbbc2
MT
1006 # ... and insert networks with this prefix in case they provide additional
1007 # information (i. e. subnet of a larger chunk with a different country)
1008 for prefix in (row.prefix for row in prefixes):
5bfa1bb4 1009 self.db.execute("""
8c2dbbc2
MT
1010 WITH candidates AS (
1011 SELECT
1012 _rirdata.network,
1013 _rirdata.country,
1014 _rirdata.original_countries,
1015 _rirdata.source
1016 FROM
1017 _rirdata
1018 WHERE
1019 family(_rirdata.network) = %s
1020 AND
1021 masklen(_rirdata.network) = %s
1022 ),
1023 filtered AS (
1024 SELECT
1025 DISTINCT ON (c.network)
1026 c.network,
1027 c.country,
1028 c.original_countries,
1029 c.source,
1030 masklen(networks.network),
1031 networks.country AS parent_country
1032 FROM
1033 candidates c
1034 LEFT JOIN
1035 networks
1036 ON
1037 c.network << networks.network
1038 ORDER BY
1039 c.network,
1040 masklen(networks.network) DESC NULLS LAST
5bfa1bb4 1041 )
8c2dbbc2
MT
1042 INSERT INTO
1043 networks(network, country, original_countries, source)
5bfa1bb4
MT
1044 SELECT
1045 network,
1046 country,
1047 original_countries,
1048 source
1049 FROM
8c2dbbc2 1050 filtered
5bfa1bb4 1051 WHERE
8c2dbbc2
MT
1052 parent_country IS NULL
1053 OR
1054 country <> parent_country
1055 ON CONFLICT DO NOTHING
1056 """, family, prefix,
5bfa1bb4 1057 )
3ce28dea 1058
8c2dbbc2
MT
1059 self.db.execute("""
1060 INSERT INTO
1061 autnums
1062 (
1063 number,
1064 name,
1065 source
1066 )
1067 SELECT
1068 _autnums.number,
1069 _organizations.name,
1070 _organizations.source
1071 FROM
1072 _autnums
1073 JOIN
1074 _organizations ON _autnums.organization = _organizations.handle
1075 ON CONFLICT
1076 (
1077 number
1078 )
1079 DO UPDATE
1080 SET name = excluded.name
1081 """,
1082 )
3ce28dea 1083
98d9079b 1084 async def _import_standard_format(self, source, countries, f, *args):
8c2dbbc2
MT
1085 """
1086 Imports a single standard format source feed
1087 """
1088 # Iterate over all blocks
8c3529c7 1089 for block in iterate_over_blocks(f):
8c2dbbc2
MT
1090 self._parse_block(block, source, countries)
1091
98d9079b 1092 async def _import_extended_format(self, source, countries, f, *args):
8c2dbbc2 1093 # Iterate over all lines
8c3529c7 1094 for line in iterate_over_lines(f):
c82e2eab 1095 self._parse_line(line, source, countries)
8c2dbbc2 1096
98d9079b 1097 async def _import_arin_as_names(self, source, countries, f, *args):
fa5dccf1
MT
1098 # Wrap the data to text
1099 f = io.TextIOWrapper(f)
1100
8c2dbbc2 1101 # Walk through the file
932a7552 1102 for line in csv.DictReader(f, dialect="arin"):
8c2dbbc2
MT
1103 # Fetch status
1104 status = line.get("Status")
1105
1106 # We are only interested in anything managed by ARIN
1107 if not status == "Full Registry Services":
1108 continue
1109
1110 # Fetch organization name
1111 name = line.get("Org Name")
1112
1113 # Extract ASNs
1114 first_asn = line.get("Start AS Number")
1115 last_asn = line.get("End AS Number")
1116
1117 # Cast to a number
1118 try:
1119 first_asn = int(first_asn)
1120 except TypeError as e:
1121 log.warning("Could not parse ASN '%s'" % first_asn)
1122 continue
1123
1124 try:
1125 last_asn = int(last_asn)
1126 except TypeError as e:
1127 log.warning("Could not parse ASN '%s'" % last_asn)
1128 continue
1129
1130 # Check if the range is valid
1131 if last_asn < first_asn:
1132 log.warning("Invalid ASN range %s-%s" % (first_asn, last_asn))
1133
1134 # Insert everything into the database
1135 for asn in range(first_asn, last_asn + 1):
1136 if not self._check_parsed_asn(asn):
1137 log.warning("Skipping invalid ASN %s" % asn)
1138 continue
0365119d 1139
3ce28dea 1140 self.db.execute("""
fa9220b0
MT
1141 INSERT INTO
1142 autnums
1143 (
1144 number,
1145 name,
1146 source
1147 )
8c2dbbc2
MT
1148 VALUES
1149 (
1150 %s, %s, %s
1151 )
fa9220b0
MT
1152 ON CONFLICT
1153 (
1154 number
1155 )
8c2dbbc2
MT
1156 DO NOTHING
1157 """, asn, name, "ARIN",
fa9220b0 1158 )
429a43d1 1159
bd341642
PM
1160 def _check_parsed_network(self, network):
1161 """
1162 Assistive function to detect and subsequently sort out parsed
1163 networks from RIR data (both Whois and so-called "extended sources"),
1164 which are or have...
1165
1166 (a) not globally routable (RFC 1918 space, et al.)
1167 (b) covering a too large chunk of the IP address space (prefix length
1168 is < 7 for IPv4 networks, and < 10 for IPv6)
1169 (c) "0.0.0.0" or "::" as a network address
bd341642
PM
1170
1171 This unfortunately is necessary due to brain-dead clutter across
1172 various RIR databases, causing mismatches and eventually disruptions.
1173
1174 We will return False in case a network is not suitable for adding
1175 it to our database, and True otherwise.
1176 """
d9b0a6ea
MT
1177 # Check input
1178 if isinstance(network, ipaddress.IPv6Network):
1179 pass
1180 elif isinstance(network, ipaddress.IPv4Network):
1181 pass
1182 else:
1183 raise ValueError("Invalid network: %s (type %s)" % (network, type(network)))
bd341642 1184
cb45f0d5 1185 # Ignore anything that isn't globally routable
bd341642 1186 if not network.is_global:
2ba6ed07 1187 log.debug("Skipping non-globally routable network: %s" % network)
bd341642
PM
1188 return False
1189
cb45f0d5
MT
1190 # Ignore anything that is unspecified IP range (See RFC 5735 for IPv4 or RFC 2373 for IPv6)
1191 elif network.is_unspecified:
1192 log.debug("Skipping unspecified network: %s" % network)
1193 return False
1194
c940dcf1
MT
1195 # IPv6
1196 if network.version == 6:
bd341642 1197 if network.prefixlen < 10:
2ba6ed07 1198 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
1199 return False
1200
c940dcf1
MT
1201 # IPv4
1202 elif network.version == 4:
1203 if network.prefixlen < 7:
1204 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
1205 return False
1206
bd341642
PM
1207 # In case we have made it here, the network is considered to
1208 # be suitable for libloc consumption...
1209 return True
1210
43fe570c
PM
1211 def _check_parsed_asn(self, asn):
1212 """
1213 Assistive function to filter Autonomous System Numbers not being suitable
1214 for adding to our database. Returns False in such cases, and True otherwise.
1215 """
1216
1217 for start, end in VALID_ASN_RANGES:
1218 if start <= asn and end >= asn:
1219 return True
1220
1221 log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
1222 return False
1223
c60e31ea
MT
1224 def _check_geofeed_url(self, url):
1225 """
1226 This function checks if a Geofeed URL is valid.
1227
1228 If so, it returns the normalized URL which should be stored instead of
1229 the original one.
1230 """
1231 # Parse the URL
1232 try:
1233 url = urllib.parse.urlparse(url)
1234 except ValueError as e:
1235 log.warning("Invalid URL %s: %s" % (url, e))
1236 return
1237
1238 # Make sure that this is a HTTPS URL
1239 if not url.scheme == "https":
1240 log.warning("Skipping Geofeed URL that is not using HTTPS: %s" \
1241 % url.geturl())
1242 return
1243
1244 # Normalize the URL and convert it back
1245 return url.geturl()
1246
d4697973 1247 def _parse_block(self, block, source_key, countries):
6ffd06b5
MT
1248 # Get first line to find out what type of block this is
1249 line = block[0]
1250
6ffd06b5 1251 # aut-num
429a43d1 1252 if line.startswith("aut-num:"):
28b08385 1253 return self._parse_autnum_block(block, source_key)
6ffd06b5 1254
aadac4c5
PM
1255 # inetnum
1256 if line.startswith("inet6num:") or line.startswith("inetnum:"):
d4697973 1257 return self._parse_inetnum_block(block, source_key, countries)
aadac4c5 1258
6ffd06b5
MT
1259 # organisation
1260 elif line.startswith("organisation:"):
28b08385 1261 return self._parse_org_block(block, source_key)
6ffd06b5 1262
28b08385 1263 def _parse_autnum_block(self, block, source_key):
6ffd06b5
MT
1264 autnum = {}
1265 for line in block:
1266 # Split line
1267 key, val = split_line(line)
1268
1269 if key == "aut-num":
1270 m = re.match(r"^(AS|as)(\d+)", val)
1271 if m:
1272 autnum["asn"] = m.group(2)
1273
0365119d 1274 elif key == "org":
e7d612e5 1275 autnum[key] = val.upper()
6ffd06b5 1276
426e0bee
PM
1277 elif key == "descr":
1278 # Save the first description line as well...
1279 if not key in autnum:
1280 autnum[key] = val
1281
6ffd06b5 1282 # Skip empty objects
426e0bee 1283 if not autnum or not "asn" in autnum:
6ffd06b5
MT
1284 return
1285
426e0bee
PM
1286 # Insert a dummy organisation handle into our temporary organisations
1287 # table in case the AS does not have an organisation handle set, but
1288 # has a description (a quirk often observed in APNIC area), so we can
1289 # later display at least some string for this AS.
1290 if not "org" in autnum:
1291 if "descr" in autnum:
1292 autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn")
1293
1294 self.db.execute("INSERT INTO _organizations(handle, name, source) \
1295 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
1296 autnum.get("org"), autnum.get("descr"), source_key,
1297 )
1298 else:
1299 log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
1300 autnum.get("asn"))
1301 return
1302
6ffd06b5 1303 # Insert into database
28b08385
PM
1304 self.db.execute("INSERT INTO _autnums(number, organization, source) \
1305 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
0365119d 1306 organization = excluded.organization",
28b08385 1307 autnum.get("asn"), autnum.get("org"), source_key,
6ffd06b5
MT
1308 )
1309
d4697973 1310 def _parse_inetnum_block(self, block, source_key, countries):
aadac4c5
PM
1311 inetnum = {}
1312 for line in block:
aadac4c5
PM
1313 # Split line
1314 key, val = split_line(line)
1315
84187ab5
PM
1316 # Filter any inetnum records which are only referring to IP space
1317 # not managed by that specific RIR...
1318 if key == "netname":
5254e5fc 1319 if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()):
2ba6ed07 1320 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
84187ab5
PM
1321 return
1322
aadac4c5
PM
1323 if key == "inetnum":
1324 start_address, delim, end_address = val.partition("-")
1325
1326 # Strip any excess space
1327 start_address, end_address = start_address.rstrip(), end_address.strip()
1328
845da577
PM
1329 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
1330 if start_address and not (delim or end_address):
1331 try:
1332 start_address = ipaddress.ip_network(start_address, strict=False)
1333 except ValueError:
1334 start_address = start_address.split("/")
38d0faea 1335 ldigits = start_address[0].count(".")
845da577
PM
1336
1337 # How many octets do we need to add?
1338 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
2ad36187 1339 if ldigits == 1:
845da577 1340 start_address = start_address[0] + ".0.0/" + start_address[1]
2ad36187 1341 elif ldigits == 2:
845da577
PM
1342 start_address = start_address[0] + ".0/" + start_address[1]
1343 else:
1344 log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line)
1345 return
1346
1347 try:
1348 start_address = ipaddress.ip_network(start_address, strict=False)
1349 except ValueError:
1350 log.warning("Could not parse line in LACNIC DB format: %s" % line)
1351 return
1352
1353 # Enumerate first and last IP address of this network
1354 end_address = start_address[-1]
1355 start_address = start_address[0]
1356
1357 else:
1358 # Convert to IP address
1359 try:
1360 start_address = ipaddress.ip_address(start_address)
1361 end_address = ipaddress.ip_address(end_address)
1362 except ValueError:
1363 log.warning("Could not parse line: %s" % line)
1364 return
aadac4c5 1365
1814283b 1366 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
aadac4c5
PM
1367
1368 elif key == "inet6num":
1814283b 1369 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
aadac4c5
PM
1370
1371 elif key == "country":
5487ee0d 1372 cc = val.upper()
b6b2b331 1373
30d32299 1374 # Ignore certain country codes
5487ee0d
MT
1375 if cc in IGNORED_COUNTRIES:
1376 log.debug("Ignoring country code '%s'" % cc)
30d32299
MT
1377 continue
1378
afc966db
MT
1379 # Translate country codes
1380 try:
5487ee0d 1381 cc = TRANSLATED_COUNTRIES[cc]
afc966db
MT
1382 except KeyError:
1383 pass
01e5f0ff 1384
a9e72406
MT
1385 # Do we know this country?
1386 if not cc in countries:
1387 log.warning("Skipping invalid country code '%s'" % cc)
1388 continue
1389
5487ee0d
MT
1390 try:
1391 inetnum[key].append(cc)
1392 except KeyError:
1393 inetnum[key] = [cc]
aadac4c5 1394
183b2f74
MT
1395 # Parse the geofeed attribute
1396 elif key == "geofeed":
1397 inetnum["geofeed"] = val
1398
1399 # Parse geofeed when used as a remark
45c0536c
MT
1400 elif key == "remarks":
1401 m = re.match(r"^(?:Geofeed)\s+(https://.*)", val)
183b2f74
MT
1402 if m:
1403 inetnum["geofeed"] = m.group(1)
1404
aadac4c5 1405 # Skip empty objects
39bfa052 1406 if not inetnum:
aadac4c5
PM
1407 return
1408
1814283b
PM
1409 # Iterate through all networks enumerated from above, check them for plausibility and insert
1410 # them into the database, if _check_parsed_network() succeeded
1411 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
39bfa052
MT
1412 if not self._check_parsed_network(single_network):
1413 continue
1414
1415 # Fetch the countries or use a list with an empty country
1416 countries = inetnum.get("country", [None])
1417
1418 # Insert the network into the database but only use the first country code
1419 for cc in countries:
1420 self.db.execute("""
1421 INSERT INTO
1422 _rirdata
1423 (
1424 network,
1425 country,
1426 original_countries,
1427 source
1428 )
1429 VALUES
1430 (
1431 %s, %s, %s, %s
1432 )
1433 ON CONFLICT (network)
1434 DO UPDATE SET country = excluded.country
1435 """, "%s" % single_network, cc, [cc for cc in countries if cc], source_key,
1814283b 1436 )
aadac4c5 1437
39bfa052
MT
1438 # If there are more than one country, we will only use the first one
1439 break
183b2f74 1440
39bfa052
MT
1441 # Update any geofeed information
1442 geofeed = inetnum.get("geofeed", None)
1443 if geofeed:
b8f3075b 1444 self._parse_geofeed(source_key, geofeed, single_network)
39bfa052 1445
b8f3075b 1446 def _parse_geofeed(self, source, url, single_network):
c60e31ea
MT
1447 # Check the URL
1448 url = self._check_geofeed_url(url)
1449 if not url:
da832d44
MT
1450 return
1451
da832d44
MT
1452 # Store/update any geofeeds
1453 self.db.execute("""
1454 INSERT INTO
b8f3075b
MT
1455 network_geofeeds
1456 (
1457 network,
1458 url,
1459 source
1460 )
1461 VALUES
1462 (
1463 %s, %s, %s
da832d44 1464 )
1042baa7
MT
1465 ON CONFLICT
1466 (
2b96ed27 1467 network, url
1042baa7
MT
1468 )
1469 DO UPDATE SET
1042baa7
MT
1470 source = excluded.source
1471 """, "%s" % single_network, url, source,
da832d44
MT
1472 )
1473
28b08385 1474 def _parse_org_block(self, block, source_key):
6ffd06b5
MT
1475 org = {}
1476 for line in block:
1477 # Split line
1478 key, val = split_line(line)
1479
e7d612e5
PM
1480 if key == "organisation":
1481 org[key] = val.upper()
1482 elif key == "org-name":
6ffd06b5
MT
1483 org[key] = val
1484
1485 # Skip empty objects
1486 if not org:
1487 return
1488
28b08385
PM
1489 self.db.execute("INSERT INTO _organizations(handle, name, source) \
1490 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
0365119d 1491 UPDATE SET name = excluded.name",
28b08385 1492 org.get("organisation"), org.get("org-name"), source_key,
6ffd06b5
MT
1493 )
1494
4b931f6c 1495 def _parse_line(self, line, source_key, validcountries=None):
429a43d1
MT
1496 # Skip version line
1497 if line.startswith("2"):
1498 return
6ffd06b5 1499
429a43d1
MT
1500 # Skip comments
1501 if line.startswith("#"):
1502 return
6ffd06b5 1503
429a43d1
MT
1504 try:
1505 registry, country_code, type, line = line.split("|", 3)
1506 except:
1507 log.warning("Could not parse line: %s" % line)
1508 return
6ffd06b5 1509
9a9b0698
MT
1510 # Skip ASN
1511 if type == "asn":
1512 return
1513
4b931f6c 1514 # Skip any unknown protocols
9a9b0698 1515 elif not type in ("ipv6", "ipv4"):
4b931f6c
MT
1516 log.warning("Unknown IP protocol '%s'" % type)
1517 return
1518
84b175e2
PM
1519 # Skip any lines that are for stats only or do not have a country
1520 # code at all (avoids log spam below)
1521 if not country_code or country_code == '*':
1522 return
1523
1524 # Skip objects with unknown country codes
1525 if validcountries and country_code not in validcountries:
1526 log.warning("Skipping line with bogus country '%s': %s" % \
1527 (country_code, line))
6ffd06b5
MT
1528 return
1529
429a43d1
MT
1530 try:
1531 address, prefix, date, status, organization = line.split("|")
1532 except ValueError:
1533 organization = None
1534
1535 # Try parsing the line without organization
1536 try:
1537 address, prefix, date, status = line.split("|")
1538 except ValueError:
1539 log.warning("Unhandled line format: %s" % line)
1540 return
1541
1542 # Skip anything that isn't properly assigned
1543 if not status in ("assigned", "allocated"):
1544 return
1545
1546 # Cast prefix into an integer
1547 try:
1548 prefix = int(prefix)
1549 except:
1550 log.warning("Invalid prefix: %s" % prefix)
7177031f 1551 return
429a43d1
MT
1552
1553 # Fix prefix length for IPv4
1554 if type == "ipv4":
1555 prefix = 32 - int(math.log(prefix, 2))
1556
1557 # Try to parse the address
1558 try:
1559 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
1560 except ValueError:
1561 log.warning("Invalid IP address: %s" % address)
1562 return
1563
bd341642
PM
1564 if not self._check_parsed_network(network):
1565 return
1566
4b931f6c
MT
1567 self.db.execute("""
1568 INSERT INTO
1569 networks
1570 (
1571 network,
1572 country,
1573 original_countries,
1574 source
1575 )
1576 VALUES
1577 (
1578 %s, %s, %s, %s
1579 )
1580 ON CONFLICT (network)
1581 DO UPDATE SET country = excluded.country
60f09cda 1582 """, "%s" % network, country_code, [country_code], source_key,
6ffd06b5
MT
1583 )
1584
98d9079b 1585 async def handle_update_announcements(self, ns):
83d61c46
MT
1586 server = ns.server[0]
1587
22d8d199
MT
1588 with self.db.transaction():
1589 if server.startswith("/"):
98d9079b 1590 await self._handle_update_announcements_from_bird(server)
22d8d199
MT
1591
1592 # Purge anything we never want here
1593 self.db.execute("""
1594 -- Delete default routes
1595 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
1596
1597 -- Delete anything that is not global unicast address space
1598 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
1599
1600 -- DELETE "current network" address space
1601 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
1602
1603 -- DELETE local loopback address space
1604 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
1605
1606 -- DELETE RFC 1918 address space
1607 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
1608 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
1609 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
1610
1611 -- DELETE test, benchmark and documentation address space
1612 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
1613 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
1614 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
1615 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
1616 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
1617
1618 -- DELETE CGNAT address space (RFC 6598)
1619 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
1620
1621 -- DELETE link local address space
1622 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
1623
b4d5b2a6 1624 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
22d8d199 1625 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
b4d5b2a6 1626 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
22d8d199
MT
1627
1628 -- DELETE multicast and reserved address space
1629 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
1630 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
1631
1632 -- Delete networks that are too small to be in the global routing table
1633 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
1634 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
1635
1636 -- Delete any non-public or reserved ASNs
1637 DELETE FROM announcements WHERE NOT (
1638 (autnum >= 1 AND autnum <= 23455)
1639 OR
1640 (autnum >= 23457 AND autnum <= 64495)
1641 OR
1642 (autnum >= 131072 AND autnum <= 4199999999)
1643 );
1644
1645 -- Delete everything that we have not seen for 14 days
1646 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
1647 """)
1648
98d9079b 1649 async def _handle_update_announcements_from_bird(self, server):
22d8d199 1650 # Pre-compile the regular expression for faster searching
35e5f041 1651 route = re.compile(b"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
22d8d199
MT
1652
1653 log.info("Requesting routing table from Bird (%s)" % server)
1654
35e5f041
MT
1655 aggregated_networks = []
1656
22d8d199
MT
1657 # Send command to list all routes
1658 for line in self._bird_cmd(server, "show route"):
1659 m = route.match(line)
1660 if not m:
b767db8d
MT
1661 # Skip empty lines
1662 if not line:
1663 pass
1664
45fa97cd 1665 # Ignore any header lines with the name of the routing table
b767db8d
MT
1666 elif line.startswith(b"Table"):
1667 pass
45fa97cd
MT
1668
1669 # Log anything else
b767db8d
MT
1670 else:
1671 log.debug("Could not parse line: %s" % line.decode())
1672
22d8d199
MT
1673 continue
1674
1675 # Fetch the extracted network and ASN
1676 network, autnum = m.groups()
1677
35e5f041
MT
1678 # Decode into strings
1679 if network:
1680 network = network.decode()
1681 if autnum:
1682 autnum = autnum.decode()
1683
1684 # Collect all aggregated networks
1685 if not autnum:
1686 log.debug("%s is an aggregated network" % network)
1687 aggregated_networks.append(network)
1688 continue
1689
22d8d199
MT
1690 # Insert it into the database
1691 self.db.execute("INSERT INTO announcements(network, autnum) \
1692 VALUES(%s, %s) ON CONFLICT (network) DO \
1693 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
35e5f041 1694 network, autnum,
22d8d199
MT
1695 )
1696
35e5f041
MT
1697 # Process any aggregated networks
1698 for network in aggregated_networks:
1699 log.debug("Processing aggregated network %s" % network)
1700
1701 # Run "show route all" for each network
1702 for line in self._bird_cmd(server, "show route %s all" % network):
1703 # Try finding the path
1704 m = re.match(b"\s+BGP\.as_path:.* (\d+) {\d+}$", line)
1705 if m:
1706 # Select the last AS number in the path
1707 autnum = m.group(1).decode()
1708
1709 # Insert it into the database
1710 self.db.execute("INSERT INTO announcements(network, autnum) \
1711 VALUES(%s, %s) ON CONFLICT (network) DO \
1712 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1713 network, autnum,
1714 )
1715
1716 # We don't need to process any more
1717 break
1718
22d8d199
MT
1719 def _bird_cmd(self, socket_path, command):
1720 # Connect to the socket
1721 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1722 s.connect(socket_path)
cedee656 1723
22d8d199
MT
1724 # Allocate some buffer
1725 buffer = b""
83d61c46 1726
bbcefb38
MT
1727 log.debug("Sending Bird command: %s" % command)
1728
22d8d199
MT
1729 # Send the command
1730 s.send(b"%s\n" % command.encode())
209c04b6 1731
22d8d199
MT
1732 while True:
1733 # Fill up the buffer
1734 buffer += s.recv(4096)
209c04b6 1735
22d8d199
MT
1736 while True:
1737 # Search for the next newline
1738 pos = buffer.find(b"\n")
209c04b6 1739
22d8d199
MT
1740 # If we cannot find one, we go back and read more data
1741 if pos <= 0:
1742 break
209c04b6 1743
22d8d199
MT
1744 # Cut after the newline character
1745 pos += 1
b89cee80 1746
22d8d199
MT
1747 # Split the line we want and keep the rest in buffer
1748 line, buffer = buffer[:pos], buffer[pos:]
83d61c46 1749
bbcefb38
MT
1750 # Try parsing any status lines
1751 if len(line) > 4 and line[:4].isdigit() and line[4] in (32, 45):
1752 code, delim, line = int(line[:4]), line[4], line[5:]
1753
1754 log.debug("Received response code %s from bird" % code)
1755
1756 # End of output
1757 if code == 0:
1758 return
1759
1760 # Ignore hello line
1761 elif code == 1:
1762 continue
83d61c46 1763
22d8d199
MT
1764 # Otherwise return the line
1765 yield line
83d61c46 1766
98d9079b 1767 async def handle_update_geofeeds(self, ns):
ef8d1f67
MT
1768 # Sync geofeeds
1769 with self.db.transaction():
1770 # Delete all geofeeds which are no longer linked
1771 self.db.execute("""
1772 DELETE FROM
1773 geofeeds
1774 WHERE
3efe0297 1775 geofeeds.url NOT IN (
ef8d1f67 1776 SELECT
3efe0297 1777 network_geofeeds.url
ef8d1f67
MT
1778 FROM
1779 network_geofeeds
3efe0297
MT
1780 )
1781 """,
ef8d1f67
MT
1782 )
1783
1784 # Copy all geofeeds
1785 self.db.execute("""
3efe0297
MT
1786 WITH all_geofeeds AS (
1787 SELECT
1788 network_geofeeds.url
1789 FROM
1790 network_geofeeds
3efe0297 1791 )
ef8d1f67 1792 INSERT INTO
3efe0297
MT
1793 geofeeds
1794 (
1795 url
1796 )
ef8d1f67
MT
1797 SELECT
1798 url
1799 FROM
3efe0297 1800 all_geofeeds
ef8d1f67
MT
1801 ON CONFLICT (url)
1802 DO NOTHING
1803 """,
1804 )
1805
0fcdd689
MT
1806 # Fetch all Geofeeds that require an update
1807 geofeeds = self.db.query("""
1808 SELECT
1809 id,
1810 url
1811 FROM
1812 geofeeds
1813 WHERE
1814 updated_at IS NULL
1815 OR
1816 updated_at <= CURRENT_TIMESTAMP - INTERVAL '1 week'
1817 ORDER BY
1818 id
1819 """)
1820
98d9079b
MT
1821 ratelimiter = asyncio.Semaphore(32)
1822
430c51c2 1823 # Update all geofeeds
98d9079b
MT
1824 async with asyncio.TaskGroup() as tasks:
1825 for geofeed in geofeeds:
1826 task = tasks.create_task(
1827 self._fetch_geofeed(ratelimiter, geofeed),
1828 )
0fcdd689 1829
a4e50c7a
MT
1830 # Delete data from any feeds that did not update in the last two weeks
1831 with self.db.transaction():
1832 self.db.execute("""
1833 DELETE FROM
1834 geofeed_networks
1835 WHERE
1836 geofeed_networks.geofeed_id IN (
1837 SELECT
1838 geofeeds.id
1839 FROM
1840 geofeeds
1841 WHERE
1842 updated_at IS NULL
1843 OR
1844 updated_at <= CURRENT_TIMESTAMP - INTERVAL '2 weeks'
1845 )
1846 """)
1847
98d9079b
MT
1848 async def _fetch_geofeed(self, ratelimiter, geofeed):
1849 async with ratelimiter:
1850 log.debug("Fetching Geofeed %s" % geofeed.url)
0fcdd689 1851
98d9079b
MT
1852 with self.db.transaction():
1853 # Open the URL
1854 try:
1855 # Send the request
1856 f = await asyncio.to_thread(
776226b6
MT
1857 self.downloader.retrieve,
1858
1859 # Fetch the feed by its URL
1860 geofeed.url,
1861
1862 # Send some extra headers
98d9079b
MT
1863 headers={
1864 "User-Agent" : "location/%s" % location.__version__,
1865
1866 # We expect some plain text file in CSV format
1867 "Accept" : "text/csv, text/plain",
1868 },
1869
1870 # Don't wait longer than 10 seconds for a response
776226b6 1871 timeout=10,
98d9079b 1872 )
0fcdd689 1873
98d9079b
MT
1874 # Remove any previous data
1875 self.db.execute("DELETE FROM geofeed_networks \
1876 WHERE geofeed_id = %s", geofeed.id)
0fcdd689 1877
98d9079b 1878 lineno = 0
0fcdd689 1879
98d9079b 1880 # Read the output line by line
c86209e2
MT
1881 with self.db.pipeline():
1882 for line in f:
1883 lineno += 1
0fcdd689 1884
c86209e2
MT
1885 try:
1886 line = line.decode()
5e632485 1887
c86209e2
MT
1888 # Ignore any lines we cannot decode
1889 except UnicodeDecodeError:
1890 log.debug("Could not decode line %s in %s" \
1891 % (lineno, geofeed.url))
1892 continue
56f147ce 1893
c86209e2
MT
1894 # Strip any newline
1895 line = line.rstrip()
0fcdd689 1896
c86209e2
MT
1897 # Skip empty lines
1898 if not line:
1899 continue
0fcdd689 1900
c86209e2
MT
1901 # Skip comments
1902 elif line.startswith("#"):
1903 continue
990b3ad4 1904
c86209e2
MT
1905 # Try to parse the line
1906 try:
1907 fields = line.split(",", 5)
1908 except ValueError:
1909 log.debug("Could not parse line: %s" % line)
1910 continue
0fcdd689 1911
c86209e2
MT
1912 # Check if we have enough fields
1913 if len(fields) < 4:
1914 log.debug("Not enough fields in line: %s" % line)
1915 continue
0fcdd689 1916
c86209e2
MT
1917 # Fetch all fields
1918 network, country, region, city, = fields[:4]
0fcdd689 1919
c86209e2
MT
1920 # Try to parse the network
1921 try:
1922 network = ipaddress.ip_network(network, strict=False)
1923 except ValueError:
1924 log.debug("Could not parse network: %s" % network)
1925 continue
1926
1927 # Strip any excess whitespace from country codes
1928 country = country.strip()
1929
1930 # Make the country code uppercase
1931 country = country.upper()
1932
1933 # Check the country code
1934 if not country:
1935 log.debug("Empty country code in Geofeed %s line %s" \
1936 % (geofeed.url, lineno))
1937 continue
1938
1939 elif not location.country_code_is_valid(country):
1940 log.debug("Invalid country code in Geofeed %s:%s: %s" \
1941 % (geofeed.url, lineno, country))
1942 continue
1943
1944 # Write this into the database
1945 self.db.execute("""
1946 INSERT INTO
1947 geofeed_networks (
1948 geofeed_id,
1949 network,
1950 country,
1951 region,
1952 city
1953 )
1954 VALUES (%s, %s, %s, %s, %s)""",
1955 geofeed.id,
1956 "%s" % network,
1957 country,
1958 region,
1959 city,
1960 )
0fcdd689 1961
98d9079b
MT
1962 # Catch any HTTP errors
1963 except urllib.request.HTTPError as e:
1964 self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
1965 WHERE id = %s", e.code, "%s" % e, geofeed.id)
4ed16a14 1966
98d9079b
MT
1967 # Remove any previous data when the feed has been deleted
1968 if e.code == 404:
1969 self.db.execute("DELETE FROM geofeed_networks \
1970 WHERE geofeed_id = %s", geofeed.id)
72908243 1971
98d9079b
MT
1972 # Catch any other errors and connection timeouts
1973 except (http.client.InvalidURL, urllib.request.URLError, TimeoutError) as e:
1974 log.debug("Could not fetch URL %s: %s" % (geofeed.url, e))
5e632485 1975
98d9079b
MT
1976 self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
1977 WHERE id = %s", 599, "%s" % e, geofeed.id)
0fcdd689 1978
98d9079b
MT
1979 # Mark the geofeed as updated
1980 else:
81b28603 1981 self.db.execute("""
98d9079b
MT
1982 UPDATE
1983 geofeeds
1984 SET
1985 updated_at = CURRENT_TIMESTAMP,
1986 status = NULL,
1987 error = NULL
1988 WHERE
1989 id = %s""",
81b28603 1990 geofeed.id,
81b28603 1991 )
0fcdd689 1992
98d9079b 1993 async def handle_update_overrides(self, ns):
d7fc3057 1994 with self.db.transaction():
667ad134
MT
1995 # Drop any previous content
1996 self.db.execute("TRUNCATE TABLE autnum_overrides")
1997 self.db.execute("TRUNCATE TABLE network_overrides")
d7fc3057 1998
f5a4557d
MT
1999 # Remove all Geofeeds
2000 self.db.execute("DELETE FROM network_geofeeds WHERE source = %s", "overrides")
2001
d7fc3057
MT
2002 for file in ns.files:
2003 log.info("Reading %s..." % file)
2004
2005 with open(file, "rb") as f:
8c3529c7 2006 for type, block in read_blocks(f):
d7fc3057
MT
2007 if type == "net":
2008 network = block.get("net")
2009 # Try to parse and normalise the network
2010 try:
2011 network = ipaddress.ip_network(network, strict=False)
2012 except ValueError as e:
2013 log.warning("Invalid IP network: %s: %s" % (network, e))
2014 continue
2015
94dfab8c
MT
2016 # Prevent that we overwrite all networks
2017 if network.prefixlen == 0:
2018 log.warning("Skipping %s: You cannot overwrite default" % network)
2019 continue
2020
d7fc3057 2021 self.db.execute("""
667ad134
MT
2022 INSERT INTO
2023 network_overrides
2024 (
d7fc3057
MT
2025 network,
2026 country,
2027 is_anonymous_proxy,
2028 is_satellite_provider,
e17e804e
PM
2029 is_anycast,
2030 is_drop
667ad134
MT
2031 )
2032 VALUES
2033 (
2034 %s, %s, %s, %s, %s, %s
2035 )
2036 ON CONFLICT (network) DO NOTHING
2037 """,
d7fc3057
MT
2038 "%s" % network,
2039 block.get("country"),
28d29b7c
MT
2040 self._parse_bool(block, "is-anonymous-proxy"),
2041 self._parse_bool(block, "is-satellite-provider"),
2042 self._parse_bool(block, "is-anycast"),
e17e804e 2043 self._parse_bool(block, "drop"),
d7fc3057
MT
2044 )
2045
f476cdfd
MT
2046 elif type == "aut-num":
2047 autnum = block.get("aut-num")
d7fc3057
MT
2048
2049 # Check if AS number begins with "AS"
2050 if not autnum.startswith("AS"):
2051 log.warning("Invalid AS number: %s" % autnum)
2052 continue
2053
2054 # Strip "AS"
2055 autnum = autnum[2:]
2056
2057 self.db.execute("""
667ad134
MT
2058 INSERT INTO
2059 autnum_overrides
2060 (
d7fc3057
MT
2061 number,
2062 name,
bd1aa6a1 2063 country,
d7fc3057
MT
2064 is_anonymous_proxy,
2065 is_satellite_provider,
e17e804e
PM
2066 is_anycast,
2067 is_drop
667ad134
MT
2068 )
2069 VALUES
2070 (
2071 %s, %s, %s, %s, %s, %s, %s
2072 )
2073 ON CONFLICT (number) DO NOTHING
2074 """,
bd1aa6a1
MT
2075 autnum,
2076 block.get("name"),
2077 block.get("country"),
28d29b7c
MT
2078 self._parse_bool(block, "is-anonymous-proxy"),
2079 self._parse_bool(block, "is-satellite-provider"),
2080 self._parse_bool(block, "is-anycast"),
e17e804e 2081 self._parse_bool(block, "drop"),
d7fc3057
MT
2082 )
2083
3efe0297
MT
2084 # Geofeeds
2085 elif type == "geofeed":
f5a4557d
MT
2086 networks = []
2087
2088 # Fetch the URL
3efe0297
MT
2089 url = block.get("geofeed")
2090
f5a4557d
MT
2091 # Fetch permitted networks
2092 for n in block.get("network", []):
2093 try:
2094 n = ipaddress.ip_network(n)
2095 except ValueError as e:
2096 log.warning("Ignoring invalid network %s: %s" % (n, e))
2097 continue
2098
2099 networks.append(n)
2100
2101 # Log a warning if not networks have been permitted
2102 if not networks:
2103 log.warning("Geofeed %s is not permitted for any networks. Ignoring." % url)
2104 continue
2105
c60e31ea
MT
2106 # Check the URL
2107 url = self._check_geofeed_url(url)
2108 if not url:
2aeec4ce
MT
2109 continue
2110
f5a4557d 2111 # Store the Geofeed URL
3efe0297
MT
2112 self.db.execute("""
2113 INSERT INTO
f5a4557d 2114 geofeeds
3efe0297
MT
2115 (
2116 url
2117 )
2118 VALUES
2119 (
2120 %s
2121 )
2122 ON CONFLICT (url) DO NOTHING
2123 """, url,
2124 )
2125
f5a4557d
MT
2126 # Store all permitted networks
2127 self.db.executemany("""
2128 INSERT INTO
2129 network_geofeeds
2130 (
2131 network,
2132 url,
2133 source
2134 )
2135 VALUES
2136 (
2137 %s, %s, %s
2138 )
1042baa7
MT
2139 ON CONFLICT
2140 (
2b96ed27 2141 network, url
1042baa7
MT
2142 )
2143 DO UPDATE SET
1042baa7 2144 source = excluded.source
f5a4557d
MT
2145 """, (("%s" % n, url, "overrides") for n in networks),
2146 )
2147
d7fc3057 2148 else:
03d24a9b 2149 log.warning("Unsupported type: %s" % type)
d7fc3057 2150
98d9079b 2151 async def handle_update_feeds(self, ns):
93aad7f7
MT
2152 """
2153 Update any third-party feeds
2154 """
a775372b
MT
2155 success = True
2156
a775372b
MT
2157 feeds = (
2158 # AWS IP Ranges
2159 ("AWS-IP-RANGES", self._import_aws_ip_ranges, "https://ip-ranges.amazonaws.com/ip-ranges.json"),
377cead4
MT
2160
2161 # Spamhaus DROP
2162 ("SPAMHAUS-DROP", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/drop.txt"),
2163 ("SPAMHAUS-EDROP", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/edrop.txt"),
2164 ("SPAMHAUS-DROPV6", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/dropv6.txt"),
5dcda5c8
MT
2165
2166 # Spamhaus ASNDROP
2167 ("SPAMHAUS-ASNDROP", self._import_spamhaus_asndrop, "https://www.spamhaus.org/drop/asndrop.json"),
a775372b 2168 )
659c2810
MT
2169
2170 # Drop any data from feeds that we don't support (any more)
2171 with self.db.transaction():
2172 # Fetch the names of all feeds we support
2173 sources = [name for name, *rest in feeds]
2174
2175 self.db.execute("DELETE FROM autnum_feeds WHERE NOT source = ANY(%s)", sources)
2176 self.db.execute("DELETE FROM network_feeds WHERE NOT source = ANY(%s)", sources)
a775372b
MT
2177
2178 # Walk through all feeds
2179 for name, callback, url, *args in feeds:
f7f8e714
MT
2180 # Skip any feeds that were not requested on the command line
2181 if ns.feeds and not name in ns.feeds:
2182 continue
2183
a775372b 2184 try:
98d9079b 2185 await self._process_feed(name, callback, url, *args)
a775372b
MT
2186
2187 # Log an error but continue if an exception occurs
2188 except Exception as e:
2189 log.error("Error processing feed '%s': %s" % (name, e))
2190 success = False
93aad7f7 2191
a775372b
MT
2192 # Return status
2193 return 0 if success else 1
dcef2ba4 2194
98d9079b 2195 async def _process_feed(self, name, callback, url, *args):
a775372b
MT
2196 """
2197 Processes one feed
2198 """
2199 # Open the URL
335946c0 2200 f = self.downloader.retrieve(url)
85e44d9a 2201
a775372b
MT
2202 with self.db.transaction():
2203 # Drop any previous content
2204 self.db.execute("DELETE FROM autnum_feeds WHERE source = %s", name)
2205 self.db.execute("DELETE FROM network_feeds WHERE source = %s", name)
dcef2ba4 2206
a775372b 2207 # Call the callback to process the feed
c86209e2
MT
2208 with self.db.pipeline():
2209 return await callback(name, f, *args)
a775372b 2210
98d9079b 2211 async def _import_aws_ip_ranges(self, name, f):
a775372b 2212 # Parse the feed
c30e421e 2213 feed = json.load(f)
68fdeb3b 2214
99e4a4eb 2215 # Set up a dictionary for mapping a region name to a country. Unfortunately,
dcef2ba4
PM
2216 # there seems to be no machine-readable version available of this other than
2217 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
2218 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
2219 # was helpful here as well.
2220 aws_region_country_map = {
99e4a4eb
MT
2221 # Africa
2222 "af-south-1" : "ZA",
2223
2224 # Asia
2225 "il-central-1" : "IL", # Tel Aviv
2226
2227 # Asia/Pacific
2228 "ap-northeast-1" : "JP",
2229 "ap-northeast-2" : "KR",
2230 "ap-northeast-3" : "JP",
2231 "ap-east-1" : "HK",
2232 "ap-south-1" : "IN",
2233 "ap-south-2" : "IN",
2234 "ap-southeast-1" : "SG",
2235 "ap-southeast-2" : "AU",
2236 "ap-southeast-3" : "MY",
2237 "ap-southeast-4" : "AU",
2238 "ap-southeast-5" : "NZ", # Auckland, NZ
2239 "ap-southeast-6" : "AP", # XXX: Precise location not documented anywhere
2240
2241 # Canada
2242 "ca-central-1" : "CA",
2243 "ca-west-1" : "CA",
2244
2245 # Europe
2246 "eu-central-1" : "DE",
2247 "eu-central-2" : "CH",
2248 "eu-north-1" : "SE",
2249 "eu-west-1" : "IE",
2250 "eu-west-2" : "GB",
2251 "eu-west-3" : "FR",
2252 "eu-south-1" : "IT",
2253 "eu-south-2" : "ES",
2254
2255 # Middle East
2256 "me-central-1" : "AE",
2257 "me-south-1" : "BH",
2258
2259 # South America
2260 "sa-east-1" : "BR",
2261
2262 # Undocumented, likely located in Berlin rather than Frankfurt
2263 "eusc-de-east-1" : "DE",
2264 }
dcef2ba4 2265
c30e421e
MT
2266 # Collect a list of all networks
2267 prefixes = feed.get("ipv6_prefixes", []) + feed.get("prefixes", [])
dcef2ba4 2268
c30e421e
MT
2269 for prefix in prefixes:
2270 # Fetch network
2271 network = prefix.get("ipv6_prefix") or prefix.get("ip_prefix")
2272
2273 # Parse the network
a775372b 2274 try:
c30e421e
MT
2275 network = ipaddress.ip_network(network)
2276 except ValuleError as e:
2277 log.warning("%s: Unable to parse prefix %s" % (name, network))
a775372b 2278 continue
dcef2ba4 2279
a775372b
MT
2280 # Sanitize parsed networks...
2281 if not self._check_parsed_network(network):
2282 continue
dcef2ba4 2283
c30e421e
MT
2284 # Fetch the region
2285 region = prefix.get("region")
2286
2287 # Set some defaults
a775372b
MT
2288 cc = None
2289 is_anycast = False
2290
c30e421e
MT
2291 # Fetch the CC from the dictionary
2292 try:
a775372b 2293 cc = aws_region_country_map[region]
dcef2ba4 2294
c30e421e
MT
2295 # If we couldn't find anything, let's try something else...
2296 except KeyError as e:
2297 # Find anycast networks
2298 if region == "GLOBAL":
2299 is_anycast = True
2300
2301 # Everything that starts with us- is probably in the United States
2302 elif region.startswith("us-"):
2303 cc = "US"
2304
2305 # Everything that starts with cn- is probably China
2306 elif region.startswith("cn-"):
2307 cc = "CN"
2308
2309 # Log a warning for anything else
2310 else:
2311 log.warning("%s: Could not determine country code for AWS region %s" \
2312 % (name, region))
2313 continue
dcef2ba4 2314
c30e421e 2315 # Write to database
a775372b
MT
2316 self.db.execute("""
2317 INSERT INTO
2318 network_feeds
2319 (
2320 network,
2321 source,
2322 country,
2323 is_anycast
dcef2ba4 2324 )
a775372b
MT
2325 VALUES
2326 (
2327 %s, %s, %s, %s
2328 )
2329 ON CONFLICT (network, source) DO NOTHING
c30e421e 2330 """, "%s" % network, name, cc, is_anycast,
a775372b 2331 )
dcef2ba4 2332
98d9079b 2333 async def _import_spamhaus_drop(self, name, f):
377cead4
MT
2334 """
2335 Import Spamhaus DROP IP feeds
2336 """
2337 # Count all lines
2338 lines = 0
69b3d894 2339
377cead4
MT
2340 # Walk through all lines
2341 for line in f:
2342 # Decode line
2343 line = line.decode("utf-8")
69b3d894 2344
377cead4
MT
2345 # Strip off any comments
2346 line, _, comment = line.partition(";")
69b3d894 2347
377cead4
MT
2348 # Ignore empty lines
2349 if not line:
2350 continue
85e44d9a 2351
377cead4
MT
2352 # Strip any excess whitespace
2353 line = line.strip()
69b3d894 2354
377cead4
MT
2355 # Increment line counter
2356 lines += 1
5acac2a4 2357
377cead4
MT
2358 # Parse the network
2359 try:
2360 network = ipaddress.ip_network(line)
2361 except ValueError as e:
2362 log.warning("%s: Could not parse network: %s - %s" % (name, line, e))
2363 continue
69b3d894 2364
377cead4
MT
2365 # Check network
2366 if not self._check_parsed_network(network):
2367 log.warning("%s: Skipping bogus network: %s" % (name, network))
2368 continue
69b3d894 2369
377cead4
MT
2370 # Insert into the database
2371 self.db.execute("""
2372 INSERT INTO
2373 network_feeds
2374 (
2375 network,
2376 source,
2377 is_drop
2378 )
2379 VALUES
2380 (
2381 %s, %s, %s
2382 )""", "%s" % network, name, True,
2383 )
69b3d894 2384
377cead4
MT
2385 # Raise an exception if we could not import anything
2386 if not lines:
2387 raise RuntimeError("Received bogus feed %s with no data" % name)
69b3d894 2388
98d9079b 2389 async def _import_spamhaus_asndrop(self, name, f):
5dcda5c8
MT
2390 """
2391 Import Spamhaus ASNDROP feed
2392 """
2393 for line in f:
2394 # Decode the line
2395 line = line.decode("utf-8")
5acac2a4 2396
5dcda5c8
MT
2397 # Parse JSON
2398 try:
2399 line = json.loads(line)
2400 except json.JSONDecodeError as e:
2401 log.warning("%s: Unable to parse JSON object %s: %s" % (name, line, e))
2402 continue
69b3d894 2403
5dcda5c8
MT
2404 # Fetch type
2405 type = line.get("type")
69b3d894 2406
5dcda5c8
MT
2407 # Skip any metadata
2408 if type == "metadata":
2409 continue
69b3d894 2410
5dcda5c8
MT
2411 # Fetch ASN
2412 asn = line.get("asn")
69b3d894 2413
5dcda5c8
MT
2414 # Skip any lines without an ASN
2415 if not asn:
2416 continue
69b3d894 2417
5dcda5c8
MT
2418 # Filter invalid ASNs
2419 if not self._check_parsed_asn(asn):
2420 log.warning("%s: Skipping bogus ASN %s" % (name, asn))
2421 continue
69b3d894 2422
5dcda5c8
MT
2423 # Write to database
2424 self.db.execute("""
2425 INSERT INTO
2426 autnum_feeds
2427 (
2428 number,
2429 source,
2430 is_drop
2431 )
2432 VALUES
2433 (
2434 %s, %s, %s
2435 )""", "%s" % asn, name, True,
2436 )
69b3d894 2437
28d29b7c
MT
2438 @staticmethod
2439 def _parse_bool(block, key):
2440 val = block.get(key)
2441
2442 # There is no point to proceed when we got None
2443 if val is None:
2444 return
2445
2446 # Convert to lowercase
2447 val = val.lower()
2448
2449 # True
2450 if val in ("yes", "1"):
2451 return True
2452
2453 # False
2454 if val in ("no", "0"):
2455 return False
2456
2457 # Default to None
2458 return None
2459
98d9079b 2460 async def handle_import_countries(self, ns):
8084b33a
MT
2461 with self.db.transaction():
2462 # Drop all data that we have
2463 self.db.execute("TRUNCATE TABLE countries")
2464
2465 for file in ns.file:
2466 for line in file:
2467 line = line.rstrip()
2468
2469 # Ignore any comments
2470 if line.startswith("#"):
2471 continue
2472
2473 try:
2474 country_code, continent_code, name = line.split(maxsplit=2)
2475 except:
2476 log.warning("Could not parse line: %s" % line)
2477 continue
2478
2479 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
2480 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
2481
6ffd06b5
MT
2482
2483def split_line(line):
2484 key, colon, val = line.partition(":")
2485
2486 # Strip any excess space
2487 key = key.strip()
2488 val = val.strip()
78ff0cf2 2489
6ffd06b5 2490 return key, val
78ff0cf2 2491
8c3529c7
MT
2492def read_blocks(f):
2493 for block in iterate_over_blocks(f):
2494 type = None
2495 data = {}
2496
2497 for i, line in enumerate(block):
2498 key, value = line.split(":", 1)
2499
2500 # The key of the first line defines the type
2501 if i == 0:
2502 type = key
2503
f5a4557d
MT
2504 # Strip any excess whitespace
2505 value = value.strip()
2506
2507 # Store some values as a list
2508 if type == "geofeed" and key == "network":
2509 try:
2510 data[key].append(value)
2511 except KeyError:
2512 data[key] = [value]
2513
2514 # Otherwise store the value as string
2515 else:
2516 data[key] = value
8c3529c7
MT
2517
2518 yield type, data
2519
2520def iterate_over_blocks(f, charsets=("utf-8", "latin1")):
2521 block = []
2522
2523 for line in f:
2524 # Skip commented lines
2525 if line.startswith(b"#") or line.startswith(b"%"):
2526 continue
2527
2528 # Convert to string
2529 for charset in charsets:
2530 try:
2531 line = line.decode(charset)
2532 except UnicodeDecodeError:
2533 continue
2534 else:
2535 break
2536
2537 # Remove any comments at the end of line
2538 line, hash, comment = line.partition("#")
2539
2540 # Strip any whitespace at the end of the line
2541 line = line.rstrip()
2542
2543 # If we cut off some comment and the line is empty, we can skip it
2544 if comment and not line:
2545 continue
2546
2547 # If the line has some content, keep collecting it
2548 if line:
2549 block.append(line)
2550 continue
2551
2552 # End the block on an empty line
2553 if block:
2554 yield block
2555
2556 # Reset the block
2557 block = []
2558
2559 # Return the last block
2560 if block:
2561 yield block
2562
2563def iterate_over_lines(f):
2564 for line in f:
2565 # Decode the line
2566 line = line.decode()
2567
2568 # Strip the ending
2569 yield line.rstrip()
2570
98d9079b 2571async def main():
78ff0cf2
MT
2572 # Run the command line interface
2573 c = CLI()
78ff0cf2 2574
98d9079b
MT
2575 await c.run()
2576
2577asyncio.run(main())