]> git.ipfire.org Git - people/ms/libloc.git/blame - src/scripts/location-importer.in
lua: Fix raising an exception if no network was found
[people/ms/libloc.git] / src / scripts / location-importer.in
CommitLineData
78ff0cf2
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
5c7cfeb2 6# Copyright (C) 2020-2024 IPFire Development Team <info@ipfire.org> #
78ff0cf2
MT
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import argparse
98d9079b 21import asyncio
f4f3d8ad 22import csv
81b28603 23import functools
a6fedd9f 24import http.client
fa5dccf1 25import io
6ffd06b5 26import ipaddress
dcef2ba4 27import json
78ff0cf2 28import logging
6ffd06b5
MT
29import math
30import re
22d8d199 31import socket
78ff0cf2 32import sys
3ce28dea 33import urllib.error
78ff0cf2
MT
34
35# Load our location module
36import location
29c6fa22 37import location.database
335946c0 38from location.downloader import Downloader
78ff0cf2
MT
39from location.i18n import _
40
41# Initialise logging
42log = logging.getLogger("location.importer")
43log.propagate = 1
44
43fe570c
PM
45# Define constants
46VALID_ASN_RANGES = (
47 (1, 23455),
48 (23457, 64495),
49 (131072, 4199999999),
50)
51
afc966db
MT
52TRANSLATED_COUNTRIES = {
53 # When people say UK, they mean GB
54 "UK" : "GB",
55}
56
30d32299
MT
57IGNORED_COUNTRIES = set((
58 # Formerly Yugoslavia
59 "YU",
60
61 # Some people use ZZ to say "no country" or to hide the country
62 "ZZ",
63))
64
18a72eac
MT
65# Configure the CSV parser for ARIN
66csv.register_dialect("arin", delimiter=",", quoting=csv.QUOTE_ALL, quotechar="\"")
43fe570c 67
78ff0cf2
MT
68class CLI(object):
69 def parse_cli(self):
70 parser = argparse.ArgumentParser(
71 description=_("Location Importer Command Line Interface"),
72 )
6ffd06b5 73 subparsers = parser.add_subparsers()
78ff0cf2
MT
74
75 # Global configuration flags
76 parser.add_argument("--debug", action="store_true",
77 help=_("Enable debug output"))
bc1f5f53
MT
78 parser.add_argument("--quiet", action="store_true",
79 help=_("Enable quiet mode"))
78ff0cf2
MT
80
81 # version
82 parser.add_argument("--version", action="version",
83 version="%(prog)s @VERSION@")
84
29c6fa22
MT
85 # Database
86 parser.add_argument("--database-host", required=True,
87 help=_("Database Hostname"), metavar=_("HOST"))
88 parser.add_argument("--database-name", required=True,
89 help=_("Database Name"), metavar=_("NAME"))
90 parser.add_argument("--database-username", required=True,
91 help=_("Database Username"), metavar=_("USERNAME"))
92 parser.add_argument("--database-password", required=True,
93 help=_("Database Password"), metavar=_("PASSWORD"))
94
0983f3dd
MT
95 # Write Database
96 write = subparsers.add_parser("write", help=_("Write database to file"))
97 write.set_defaults(func=self.handle_write)
98 write.add_argument("file", nargs=1, help=_("Database File"))
99 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
1164d876 100 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
0983f3dd
MT
101 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
102 write.add_argument("--description", nargs="?", help=_("Sets a description"))
103 write.add_argument("--license", nargs="?", help=_("Sets the license"))
b904896a 104 write.add_argument("--version", type=int, help=_("Database Format Version"))
0983f3dd 105
6ffd06b5
MT
106 # Update WHOIS
107 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
d77d05d1
MT
108 update_whois.add_argument("sources", nargs="*",
109 help=_("Only update these sources"))
6ffd06b5
MT
110 update_whois.set_defaults(func=self.handle_update_whois)
111
83d61c46
MT
112 # Update announcements
113 update_announcements = subparsers.add_parser("update-announcements",
114 help=_("Update BGP Annoucements"))
115 update_announcements.set_defaults(func=self.handle_update_announcements)
116 update_announcements.add_argument("server", nargs=1,
117 help=_("Route Server to connect to"), metavar=_("SERVER"))
118
0fcdd689
MT
119 # Update geofeeds
120 update_geofeeds = subparsers.add_parser("update-geofeeds",
121 help=_("Update Geofeeds"))
122 update_geofeeds.set_defaults(func=self.handle_update_geofeeds)
123
93aad7f7
MT
124 # Update feeds
125 update_feeds = subparsers.add_parser("update-feeds",
126 help=_("Update Feeds"))
f7f8e714
MT
127 update_feeds.add_argument("feeds", nargs="*",
128 help=_("Only update these feeds"))
93aad7f7
MT
129 update_feeds.set_defaults(func=self.handle_update_feeds)
130
d7fc3057
MT
131 # Update overrides
132 update_overrides = subparsers.add_parser("update-overrides",
133 help=_("Update overrides"),
134 )
135 update_overrides.add_argument(
136 "files", nargs="+", help=_("Files to import"),
137 )
138 update_overrides.set_defaults(func=self.handle_update_overrides)
139
8084b33a
MT
140 # Import countries
141 import_countries = subparsers.add_parser("import-countries",
142 help=_("Import countries"),
143 )
144 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
145 help=_("File to import"))
146 import_countries.set_defaults(func=self.handle_import_countries)
147
78ff0cf2
MT
148 args = parser.parse_args()
149
bc1f5f53 150 # Configure logging
78ff0cf2 151 if args.debug:
f9de5e61 152 location.logger.set_level(logging.DEBUG)
bc1f5f53
MT
153 elif args.quiet:
154 location.logger.set_level(logging.WARNING)
78ff0cf2 155
6ffd06b5
MT
156 # Print usage if no action was given
157 if not "func" in args:
158 parser.print_usage()
159 sys.exit(2)
160
78ff0cf2
MT
161 return args
162
98d9079b 163 async def run(self):
78ff0cf2
MT
164 # Parse command line arguments
165 args = self.parse_cli()
166
335946c0
MT
167 # Initialize the downloader
168 self.downloader = Downloader()
169
29c6fa22 170 # Initialise database
6ffd06b5 171 self.db = self._setup_database(args)
29c6fa22 172
78ff0cf2 173 # Call function
98d9079b 174 ret = await args.func(args)
78ff0cf2
MT
175
176 # Return with exit code
177 if ret:
178 sys.exit(ret)
179
180 # Otherwise just exit
181 sys.exit(0)
182
29c6fa22
MT
183 def _setup_database(self, ns):
184 """
185 Initialise the database
186 """
187 # Connect to database
188 db = location.database.Connection(
189 host=ns.database_host, database=ns.database_name,
190 user=ns.database_username, password=ns.database_password,
191 )
192
193 with db.transaction():
194 db.execute("""
83d61c46
MT
195 -- announcements
196 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
197 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
198 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
199 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
1bf365d8
MT
200 CREATE INDEX IF NOT EXISTS announcements_search2 ON announcements
201 USING SPGIST(network inet_ops);
83d61c46 202
6ffd06b5 203 -- autnums
0983f3dd 204 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
26f06e70 205 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
6ffd06b5
MT
206 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
207
8084b33a
MT
208 -- countries
209 CREATE TABLE IF NOT EXISTS countries(
210 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
211 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
212
429a43d1 213 -- networks
83d61c46 214 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
b6b2b331 215 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
26f06e70 216 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
429a43d1 217 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
1bf365d8
MT
218 CREATE INDEX IF NOT EXISTS networks_search2 ON networks
219 USING SPGIST(network inet_ops);
d7fc3057 220
183b2f74 221 -- geofeeds
0fcdd689
MT
222 CREATE TABLE IF NOT EXISTS geofeeds(
223 id serial primary key,
224 url text,
225 status integer default null,
226 updated_at timestamp without time zone default null
227 );
ce308393 228 ALTER TABLE geofeeds ADD COLUMN IF NOT EXISTS error text;
0fcdd689
MT
229 CREATE UNIQUE INDEX IF NOT EXISTS geofeeds_unique
230 ON geofeeds(url);
231 CREATE TABLE IF NOT EXISTS geofeed_networks(
232 geofeed_id integer references geofeeds(id) on delete cascade,
233 network inet,
234 country text,
235 region text,
236 city text
237 );
238 CREATE INDEX IF NOT EXISTS geofeed_networks_geofeed_id
239 ON geofeed_networks(geofeed_id);
183b2f74 240 CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
b8f3075b 241 ALTER TABLE network_geofeeds ADD COLUMN IF NOT EXISTS source text NOT NULL;
2b96ed27
MT
242 CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique2
243 ON network_geofeeds(network, url);
7f0a6add
MT
244 CREATE INDEX IF NOT EXISTS network_geofeeds_url
245 ON network_geofeeds(url);
183b2f74 246
7808c8b7
MT
247 -- feeds
248 CREATE TABLE IF NOT EXISTS autnum_feeds(
249 number bigint NOT NULL,
250 source text NOT NULL,
251 name text,
252 country text,
253 is_anonymous_proxy boolean,
254 is_satellite_provider boolean,
255 is_anycast boolean,
256 is_drop boolean
257 );
258 CREATE UNIQUE INDEX IF NOT EXISTS autnum_feeds_unique
259 ON autnum_feeds(number, source);
260
c923c1cc
MT
261 CREATE TABLE IF NOT EXISTS network_feeds(
262 network inet NOT NULL,
263 source text NOT NULL,
264 country text,
265 is_anonymous_proxy boolean,
266 is_satellite_provider boolean,
267 is_anycast boolean,
268 is_drop boolean
269 );
270 CREATE UNIQUE INDEX IF NOT EXISTS network_feeds_unique
271 ON network_feeds(network, source);
c923c1cc 272
d7fc3057
MT
273 -- overrides
274 CREATE TABLE IF NOT EXISTS autnum_overrides(
275 number bigint NOT NULL,
276 name text,
bd1aa6a1 277 country text,
b8e25b71
MT
278 is_anonymous_proxy boolean,
279 is_satellite_provider boolean,
280 is_anycast boolean
d7fc3057
MT
281 );
282 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
283 ON autnum_overrides(number);
e17e804e 284 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
667ad134 285 ALTER TABLE autnum_overrides DROP COLUMN IF EXISTS source;
d7fc3057
MT
286
287 CREATE TABLE IF NOT EXISTS network_overrides(
288 network inet NOT NULL,
289 country text,
b8e25b71
MT
290 is_anonymous_proxy boolean,
291 is_satellite_provider boolean,
292 is_anycast boolean
d7fc3057
MT
293 );
294 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
295 ON network_overrides(network);
e17e804e 296 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
667ad134 297 ALTER TABLE network_overrides DROP COLUMN IF EXISTS source;
3efe0297 298
e075b723 299 -- Cleanup things we no longer need
f5a4557d 300 DROP TABLE IF EXISTS geofeed_overrides;
e075b723 301 DROP INDEX IF EXISTS announcements_family;
1bf365d8
MT
302 DROP INDEX IF EXISTS announcements_search;
303 DROP INDEX IF EXISTS geofeed_networks_search;
e075b723 304 DROP INDEX IF EXISTS networks_family;
1bf365d8
MT
305 DROP INDEX IF EXISTS networks_search;
306 DROP INDEX IF EXISTS network_feeds_search;
307 DROP INDEX IF EXISTS network_geofeeds_unique;
308 DROP INDEX IF EXISTS network_geofeeds_search;
309 DROP INDEX IF EXISTS network_overrides_search;
29c6fa22
MT
310 """)
311
312 return db
313
e61a9352
MT
314 def fetch_countries(self):
315 """
316 Returns a list of all countries on the list
317 """
318 # Fetch all valid country codes to check parsed networks aganist...
319 countries = self.db.query("SELECT country_code FROM countries ORDER BY country_code")
320
ae20698a 321 return set((country.country_code for country in countries))
e61a9352 322
98d9079b 323 async def handle_write(self, ns):
0983f3dd
MT
324 """
325 Compiles a database in libloc format out of what is in the database
326 """
0983f3dd 327 # Allocate a writer
1164d876 328 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
0983f3dd
MT
329
330 # Set all metadata
331 if ns.vendor:
332 writer.vendor = ns.vendor
333
334 if ns.description:
335 writer.description = ns.description
336
337 if ns.license:
338 writer.license = ns.license
339
d75593af
MT
340 # Analyze everything for the query planner hopefully making better decisions
341 self.db.execute("ANALYZE")
342
0983f3dd
MT
343 # Add all Autonomous Systems
344 log.info("Writing Autonomous Systems...")
345
346 # Select all ASes with a name
6e97c44b
MT
347 rows = self.db.query("""
348 SELECT
349 autnums.number AS number,
350 COALESCE(
ef1581fb 351 overrides.name,
6e97c44b
MT
352 autnums.name
353 ) AS name
ef1581fb
MT
354 FROM
355 autnums
356 LEFT JOIN
357 autnum_overrides overrides ON autnums.number = overrides.number
358 ORDER BY
359 autnums.number
360 """)
0983f3dd
MT
361
362 for row in rows:
008d5468
MT
363 # Skip AS without names
364 if not row.name:
365 continue
366
0983f3dd
MT
367 a = writer.add_as(row.number)
368 a.name = row.name
369
370 # Add all networks
371 log.info("Writing networks...")
372
c0a5f8f4
MT
373 # Create a new temporary table where we collect
374 # the networks that we are interested in
375 self.db.execute("""
376 CREATE TEMPORARY TABLE
377 n
378 (
379 network inet NOT NULL,
380 autnum integer,
381 country text,
382 is_anonymous_proxy boolean,
383 is_satellite_provider boolean,
384 is_anycast boolean,
385 is_drop boolean
386 )
387 WITH (FILLFACTOR = 50)
388 """)
295a7774 389
c0a5f8f4
MT
390 # Add all known networks
391 self.db.execute("""
392 INSERT INTO
393 n
394 (
395 network
295a7774
MT
396 )
397
0983f3dd 398 SELECT
c0a5f8f4
MT
399 network
400 FROM
401 announcements
bd1aa6a1 402
c0a5f8f4 403 UNION
8e8555bb 404
c0a5f8f4
MT
405 SELECT
406 network
407 FROM
408 networks
409
410 UNION
411
412 SELECT
413 network
414 FROM
415 network_feeds
416
417 UNION
418
419 SELECT
420 network
421 FROM
422 network_overrides
423
424 UNION
425
426 SELECT
427 network
428 FROM
429 geofeed_networks
430 """)
431
432 # Create an index to search through networks faster
433 self.db.execute("""
434 CREATE INDEX
435 n_search
436 ON
437 n
438 USING
439 SPGIST(network)
440 """)
441
442 # Analyze n
443 self.db.execute("ANALYZE n")
444
445 # Apply the AS number to all networks
446 self.db.execute("""
447 -- Join all networks together with their most specific announcements
448 WITH announcements AS (
449 SELECT
450 n.network,
451 announcements.autnum,
452
453 -- Sort all merges and number them so
454 -- that we can later select the best one
455 ROW_NUMBER()
456 OVER
c923c1cc 457 (
c0a5f8f4
MT
458 PARTITION BY
459 n.network
476297aa 460 ORDER BY
c0a5f8f4
MT
461 masklen(announcements.network) DESC
462 ) AS row
463 FROM
464 n
465 JOIN
466 announcements
467 ON
468 announcements.network >>= n.network
469 )
470
471 -- Store the result
472 UPDATE
473 n
474 SET
475 autnum = announcements.autnum
476 FROM
477 announcements
478 WHERE
479 announcements.network = n.network
480 AND
481 announcements.row = 1
482 """,
483 )
484
485 # Apply country information
486 self.db.execute("""
487 WITH networks AS (
488 SELECT
489 n.network,
490 networks.country,
491
492 ROW_NUMBER()
493 OVER
7808c8b7 494 (
c0a5f8f4
MT
495 PARTITION BY
496 n.network
476297aa 497 ORDER BY
c0a5f8f4
MT
498 masklen(networks.network) DESC
499 ) AS row
500 FROM
501 n
502 JOIN
503 networks
504 ON
505 networks.network >>= n.network
506 )
507
508 UPDATE
509 n
510 SET
511 country = networks.country
512 FROM
513 networks
514 WHERE
515 networks.network = n.network
516 AND
517 networks.row = 1
518 """,
519 )
520
521 # Add all country information from Geofeeds
522 self.db.execute("""
523 WITH geofeeds AS (
524 SELECT
525 DISTINCT ON (geofeed_networks.network)
526 geofeed_networks.network,
527 geofeed_networks.country
528 FROM
529 geofeeds
530 JOIN
531 network_geofeeds networks
532 ON
533 geofeeds.url = networks.url
534 JOIN
535 geofeed_networks
536 ON
537 geofeeds.id = geofeed_networks.geofeed_id
538 AND
539 networks.network >>= geofeed_networks.network
540 ),
541
542 networks AS (
543 SELECT
544 n.network,
545 geofeeds.country,
546
547 ROW_NUMBER()
548 OVER
e17e804e 549 (
c0a5f8f4
MT
550 PARTITION BY
551 n.network
476297aa 552 ORDER BY
c0a5f8f4
MT
553 masklen(geofeeds.network) DESC
554 ) AS row
555 FROM
556 n
557 JOIN
558 geofeeds
559 ON
560 geofeeds.network >>= n.network
561 )
562
563 UPDATE
564 n
565 SET
566 country = networks.country
567 FROM
568 networks
569 WHERE
570 networks.network = n.network
571 AND
572 networks.row = 1
573 """,
574 )
575
576 # Apply country and flags from feeds
577 self.db.execute("""
578 WITH networks AS (
579 SELECT
580 n.network,
581 network_feeds.country,
582
583 -- Flags
584 network_feeds.is_anonymous_proxy,
585 network_feeds.is_satellite_provider,
586 network_feeds.is_anycast,
587 network_feeds.is_drop,
588
589 ROW_NUMBER()
590 OVER
c923c1cc 591 (
c0a5f8f4
MT
592 PARTITION BY
593 n.network
476297aa 594 ORDER BY
c0a5f8f4
MT
595 masklen(network_feeds.network) DESC
596 ) AS row
597 FROM
598 n
599 JOIN
600 network_feeds
601 ON
602 network_feeds.network >>= n.network
603 )
604
605 UPDATE
606 n
607 SET
608 country =
609 COALESCE(networks.country, n.country),
610
611 is_anonymous_proxy =
612 COALESCE(networks.is_anonymous_proxy, n.is_anonymous_proxy),
613
614 is_satellite_provider =
615 COALESCE(networks.is_satellite_provider, n.is_satellite_provider),
616
617 is_anycast =
618 COALESCE(networks.is_anycast, n.is_anycast),
619
620 is_drop =
621 COALESCE(networks.is_drop, n.is_drop)
622 FROM
623 networks
624 WHERE
625 networks.network = n.network
626 AND
627 networks.row = 1
628 """,
629 )
630
631 # Apply country and flags from AS feeds
632 self.db.execute("""
633 WITH networks AS (
634 SELECT
635 n.network,
636 autnum_feeds.country,
637
638 -- Flags
639 autnum_feeds.is_anonymous_proxy,
640 autnum_feeds.is_satellite_provider,
641 autnum_feeds.is_anycast,
642 autnum_feeds.is_drop
643 FROM
644 n
645 JOIN
646 autnum_feeds
647 ON
648 autnum_feeds.number = n.autnum
649 )
650
651 UPDATE
652 n
653 SET
654 country =
655 COALESCE(networks.country, n.country),
656
657 is_anonymous_proxy =
658 COALESCE(networks.is_anonymous_proxy, n.is_anonymous_proxy),
659
660 is_satellite_provider =
661 COALESCE(networks.is_satellite_provider, n.is_satellite_provider),
662
663 is_anycast =
664 COALESCE(networks.is_anycast, n.is_anycast),
665
666 is_drop =
667 COALESCE(networks.is_drop, n.is_drop)
668 FROM
669 networks
670 WHERE
671 networks.network = n.network
672 """)
673
674 # Apply network overrides
675 self.db.execute("""
676 WITH networks AS (
677 SELECT
678 n.network,
679 network_overrides.country,
680
681 -- Flags
682 network_overrides.is_anonymous_proxy,
683 network_overrides.is_satellite_provider,
684 network_overrides.is_anycast,
685 network_overrides.is_drop,
686
687 ROW_NUMBER()
688 OVER
7808c8b7 689 (
c0a5f8f4
MT
690 PARTITION BY
691 n.network
476297aa 692 ORDER BY
c0a5f8f4
MT
693 masklen(network_overrides.network) DESC
694 ) AS row
695 FROM
696 n
697 JOIN
698 network_overrides
699 ON
700 network_overrides.network >>= n.network
701 )
702
703 UPDATE
704 n
705 SET
706 country =
707 COALESCE(networks.country, n.country),
708
709 is_anonymous_proxy =
710 COALESCE(networks.is_anonymous_proxy, n.is_anonymous_proxy),
711
712 is_satellite_provider =
713 COALESCE(networks.is_satellite_provider, n.is_satellite_provider),
714
715 is_anycast =
716 COALESCE(networks.is_anycast, n.is_anycast),
717
718 is_drop =
719 COALESCE(networks.is_drop, n.is_drop)
720 FROM
721 networks
722 WHERE
723 networks.network = n.network
724 AND
725 networks.row = 1
726 """)
727
728 # Apply AS overrides
729 self.db.execute("""
730 WITH networks AS (
731 SELECT
732 n.network,
733 autnum_overrides.country,
734
735 -- Flags
736 autnum_overrides.is_anonymous_proxy,
737 autnum_overrides.is_satellite_provider,
738 autnum_overrides.is_anycast,
739 autnum_overrides.is_drop
740 FROM
741 n
742 JOIN
743 autnum_overrides
744 ON
745 autnum_overrides.number = n.autnum
746 )
747
748 UPDATE
749 n
750 SET
751 country =
752 COALESCE(networks.country, n.country),
753
754 is_anonymous_proxy =
755 COALESCE(networks.is_anonymous_proxy, n.is_anonymous_proxy),
756
757 is_satellite_provider =
758 COALESCE(networks.is_satellite_provider, n.is_satellite_provider),
759
760 is_anycast =
761 COALESCE(networks.is_anycast, n.is_anycast),
762
763 is_drop =
764 COALESCE(networks.is_drop, n.is_drop)
765 FROM
766 networks
767 WHERE
768 networks.network = n.network
769 """)
770
771 # Here we could remove some networks that we no longer need, but since we
772 # already have implemented our deduplication/merge algorithm this would not
773 # be necessary.
774
775 # Export the entire temporary table
776 rows = self.db.query("""
777 SELECT
778 *
295a7774 779 FROM
c0a5f8f4
MT
780 n
781 ORDER BY
782 network
0983f3dd
MT
783 """)
784
785 for row in rows:
a7db4f94 786 network = writer.add_network("%s" % row.network)
0983f3dd 787
5372d9c7
MT
788 # Save country
789 if row.country:
790 network.country_code = row.country
791
792 # Save ASN
793 if row.autnum:
794 network.asn = row.autnum
0983f3dd
MT
795
796 # Set flags
797 if row.is_anonymous_proxy:
798 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
799
800 if row.is_satellite_provider:
801 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
802
803 if row.is_anycast:
804 network.set_flag(location.NETWORK_FLAG_ANYCAST)
805
e17e804e
PM
806 if row.is_drop:
807 network.set_flag(location.NETWORK_FLAG_DROP)
808
8084b33a
MT
809 # Add all countries
810 log.info("Writing countries...")
c0a5f8f4
MT
811
812 # Select all countries
813 rows = self.db.query("""
814 SELECT
815 *
816 FROM
817 countries
818 ORDER BY
819 country_code
820 """,
821 )
8084b33a
MT
822
823 for row in rows:
824 c = writer.add_country(row.country_code)
825 c.continent_code = row.continent_code
826 c.name = row.name
827
0983f3dd
MT
828 # Write everything to file
829 log.info("Writing database to file...")
830 for file in ns.file:
831 writer.write(file)
832
98d9079b 833 async def handle_update_whois(self, ns):
3ce28dea 834 # Did we run successfully?
8c2dbbc2
MT
835 success = True
836
837 sources = (
838 # African Network Information Centre
839 ("AFRINIC", (
840 (self._import_standard_format, "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz"),
841 )),
842
843 # Asia Pacific Network Information Centre
844 ("APNIC", (
845 (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz"),
846 (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz"),
847 (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz"),
848 (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz"),
849 )),
850
851 # American Registry for Internet Numbers
852 ("ARIN", (
853 (self._import_extended_format, "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"),
854 (self._import_arin_as_names, "https://ftp.arin.net/pub/resource_registry_service/asns.csv"),
855 )),
856
857 # Japan Network Information Center
858 ("JPNIC", (
859 (self._import_standard_format, "https://ftp.nic.ad.jp/jpirr/jpirr.db.gz"),
860 )),
861
862 # Latin America and Caribbean Network Information Centre
863 ("LACNIC", (
864 (self._import_standard_format, "https://ftp.lacnic.net/lacnic/dbase/lacnic.db.gz"),
865 (self._import_extended_format, "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"),
866 )),
867
868 # Réseaux IP Européens
869 ("RIPE", (
870 (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz"),
871 (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz"),
872 (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz"),
873 (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz"),
874 )),
875 )
84b175e2 876
fa9220b0 877 # Fetch all valid country codes to check parsed networks against
d4697973
MT
878 countries = self.fetch_countries()
879
880 # Check if we have countries
881 if not countries:
882 log.error("Please import countries before importing any WHOIS data")
883 return 1
6ffd06b5 884
3ce28dea 885 # Iterate over all potential sources
8c2dbbc2 886 for name, feeds in sources:
d77d05d1 887 # Skip anything that should not be updated
8c2dbbc2 888 if ns.sources and not name in ns.sources:
d77d05d1
MT
889 continue
890
8c2dbbc2 891 try:
98d9079b 892 await self._process_source(name, feeds, countries)
002deb6b 893
8c2dbbc2
MT
894 # Log an error but continue if an exception occurs
895 except Exception as e:
896 log.error("Error processing source %s" % name, exc_info=True)
897 success = False
898
899 # Return a non-zero exit code for errors
900 return 0 if success else 1
901
98d9079b 902 async def _process_source(self, source, feeds, countries):
8c2dbbc2
MT
903 """
904 This function processes one source
905 """
906 # Wrap everything into one large transaction
907 with self.db.transaction():
908 # Remove all previously imported content
b8f3075b
MT
909 self.db.execute("DELETE FROM autnums WHERE source = %s", source)
910 self.db.execute("DELETE FROM networks WHERE source = %s", source)
911 self.db.execute("DELETE FROM network_geofeeds WHERE source = %s", source)
8c2dbbc2
MT
912
913 # Create some temporary tables to store parsed data
914 self.db.execute("""
915 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
916 organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
917 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
918
919 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
920 name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
921 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
922
923 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text,
924 original_countries text[] NOT NULL, source text NOT NULL)
925 ON COMMIT DROP;
926 CREATE INDEX _rirdata_search ON _rirdata
927 USING BTREE(family(network), masklen(network));
928 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
929 """)
002deb6b 930
8c2dbbc2
MT
931 # Parse all feeds
932 for callback, url, *args in feeds:
933 # Retrieve the feed
335946c0 934 f = self.downloader.retrieve(url)
8c2dbbc2
MT
935
936 # Call the callback
c86209e2
MT
937 with self.db.pipeline():
938 await callback(source, countries, f, *args)
8c2dbbc2
MT
939
940 # Process all parsed networks from every RIR we happen to have access to,
941 # insert the largest network chunks into the networks table immediately...
942 families = self.db.query("""
943 SELECT DISTINCT
944 family(network) AS family
945 FROM
946 _rirdata
947 ORDER BY
948 family(network)
949 """,
950 )
951
952 for family in (row.family for row in families):
953 # Fetch the smallest mask length in our data set
954 smallest = self.db.get("""
955 SELECT
956 MIN(
957 masklen(network)
958 ) AS prefix
fa9220b0
MT
959 FROM
960 _rirdata
8c2dbbc2
MT
961 WHERE
962 family(network) = %s
963 """, family,
fa9220b0 964 )
3ce28dea 965
8c2dbbc2
MT
966 # Copy all networks
967 self.db.execute("""
968 INSERT INTO
969 networks
970 (
971 network,
972 country,
973 original_countries,
974 source
5bfa1bb4 975 )
8c2dbbc2
MT
976 SELECT
977 network,
978 country,
979 original_countries,
980 source
981 FROM
982 _rirdata
983 WHERE
984 masklen(network) = %s
985 AND
986 family(network) = %s
987 ON CONFLICT DO
988 NOTHING""",
989 smallest.prefix,
990 family,
991 )
992
993 # ... determine any other prefixes for this network family, ...
994 prefixes = self.db.query("""
995 SELECT
996 DISTINCT masklen(network) AS prefix
997 FROM
998 _rirdata
999 WHERE
1000 family(network) = %s
1001 ORDER BY
1002 masklen(network) ASC
1003 OFFSET 1
1004 """, family,
1005 )
5bfa1bb4 1006
8c2dbbc2
MT
1007 # ... and insert networks with this prefix in case they provide additional
1008 # information (i. e. subnet of a larger chunk with a different country)
1009 for prefix in (row.prefix for row in prefixes):
5bfa1bb4 1010 self.db.execute("""
8c2dbbc2
MT
1011 WITH candidates AS (
1012 SELECT
1013 _rirdata.network,
1014 _rirdata.country,
1015 _rirdata.original_countries,
1016 _rirdata.source
1017 FROM
1018 _rirdata
1019 WHERE
1020 family(_rirdata.network) = %s
1021 AND
1022 masklen(_rirdata.network) = %s
1023 ),
1024 filtered AS (
1025 SELECT
1026 DISTINCT ON (c.network)
1027 c.network,
1028 c.country,
1029 c.original_countries,
1030 c.source,
1031 masklen(networks.network),
1032 networks.country AS parent_country
1033 FROM
1034 candidates c
1035 LEFT JOIN
1036 networks
1037 ON
1038 c.network << networks.network
1039 ORDER BY
1040 c.network,
1041 masklen(networks.network) DESC NULLS LAST
5bfa1bb4 1042 )
8c2dbbc2
MT
1043 INSERT INTO
1044 networks(network, country, original_countries, source)
5bfa1bb4
MT
1045 SELECT
1046 network,
1047 country,
1048 original_countries,
1049 source
1050 FROM
8c2dbbc2 1051 filtered
5bfa1bb4 1052 WHERE
8c2dbbc2
MT
1053 parent_country IS NULL
1054 OR
1055 country <> parent_country
1056 ON CONFLICT DO NOTHING
1057 """, family, prefix,
5bfa1bb4 1058 )
3ce28dea 1059
8c2dbbc2
MT
1060 self.db.execute("""
1061 INSERT INTO
1062 autnums
1063 (
1064 number,
1065 name,
1066 source
1067 )
1068 SELECT
1069 _autnums.number,
1070 _organizations.name,
1071 _organizations.source
1072 FROM
1073 _autnums
1074 JOIN
1075 _organizations ON _autnums.organization = _organizations.handle
1076 ON CONFLICT
1077 (
1078 number
1079 )
1080 DO UPDATE
1081 SET name = excluded.name
1082 """,
1083 )
3ce28dea 1084
98d9079b 1085 async def _import_standard_format(self, source, countries, f, *args):
8c2dbbc2
MT
1086 """
1087 Imports a single standard format source feed
1088 """
1089 # Iterate over all blocks
8c3529c7 1090 for block in iterate_over_blocks(f):
8c2dbbc2
MT
1091 self._parse_block(block, source, countries)
1092
98d9079b 1093 async def _import_extended_format(self, source, countries, f, *args):
8c2dbbc2 1094 # Iterate over all lines
8c3529c7 1095 for line in iterate_over_lines(f):
c82e2eab 1096 self._parse_line(line, source, countries)
8c2dbbc2 1097
98d9079b 1098 async def _import_arin_as_names(self, source, countries, f, *args):
fa5dccf1
MT
1099 # Wrap the data to text
1100 f = io.TextIOWrapper(f)
1101
8c2dbbc2 1102 # Walk through the file
932a7552 1103 for line in csv.DictReader(f, dialect="arin"):
8c2dbbc2
MT
1104 # Fetch status
1105 status = line.get("Status")
1106
1107 # We are only interested in anything managed by ARIN
1108 if not status == "Full Registry Services":
1109 continue
1110
1111 # Fetch organization name
1112 name = line.get("Org Name")
1113
1114 # Extract ASNs
1115 first_asn = line.get("Start AS Number")
1116 last_asn = line.get("End AS Number")
1117
1118 # Cast to a number
1119 try:
1120 first_asn = int(first_asn)
1121 except TypeError as e:
1122 log.warning("Could not parse ASN '%s'" % first_asn)
1123 continue
1124
1125 try:
1126 last_asn = int(last_asn)
1127 except TypeError as e:
1128 log.warning("Could not parse ASN '%s'" % last_asn)
1129 continue
1130
1131 # Check if the range is valid
1132 if last_asn < first_asn:
1133 log.warning("Invalid ASN range %s-%s" % (first_asn, last_asn))
1134
1135 # Insert everything into the database
1136 for asn in range(first_asn, last_asn + 1):
1137 if not self._check_parsed_asn(asn):
1138 log.warning("Skipping invalid ASN %s" % asn)
1139 continue
0365119d 1140
3ce28dea 1141 self.db.execute("""
fa9220b0
MT
1142 INSERT INTO
1143 autnums
1144 (
1145 number,
1146 name,
1147 source
1148 )
8c2dbbc2
MT
1149 VALUES
1150 (
1151 %s, %s, %s
1152 )
fa9220b0
MT
1153 ON CONFLICT
1154 (
1155 number
1156 )
8c2dbbc2
MT
1157 DO NOTHING
1158 """, asn, name, "ARIN",
fa9220b0 1159 )
429a43d1 1160
bd341642
PM
1161 def _check_parsed_network(self, network):
1162 """
1163 Assistive function to detect and subsequently sort out parsed
1164 networks from RIR data (both Whois and so-called "extended sources"),
1165 which are or have...
1166
1167 (a) not globally routable (RFC 1918 space, et al.)
1168 (b) covering a too large chunk of the IP address space (prefix length
1169 is < 7 for IPv4 networks, and < 10 for IPv6)
1170 (c) "0.0.0.0" or "::" as a network address
bd341642
PM
1171
1172 This unfortunately is necessary due to brain-dead clutter across
1173 various RIR databases, causing mismatches and eventually disruptions.
1174
1175 We will return False in case a network is not suitable for adding
1176 it to our database, and True otherwise.
1177 """
d9b0a6ea
MT
1178 # Check input
1179 if isinstance(network, ipaddress.IPv6Network):
1180 pass
1181 elif isinstance(network, ipaddress.IPv4Network):
1182 pass
1183 else:
1184 raise ValueError("Invalid network: %s (type %s)" % (network, type(network)))
bd341642 1185
cb45f0d5 1186 # Ignore anything that isn't globally routable
bd341642 1187 if not network.is_global:
2ba6ed07 1188 log.debug("Skipping non-globally routable network: %s" % network)
bd341642
PM
1189 return False
1190
cb45f0d5
MT
1191 # Ignore anything that is unspecified IP range (See RFC 5735 for IPv4 or RFC 2373 for IPv6)
1192 elif network.is_unspecified:
1193 log.debug("Skipping unspecified network: %s" % network)
1194 return False
1195
c940dcf1
MT
1196 # IPv6
1197 if network.version == 6:
bd341642 1198 if network.prefixlen < 10:
2ba6ed07 1199 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
1200 return False
1201
c940dcf1
MT
1202 # IPv4
1203 elif network.version == 4:
1204 if network.prefixlen < 7:
1205 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
1206 return False
1207
bd341642
PM
1208 # In case we have made it here, the network is considered to
1209 # be suitable for libloc consumption...
1210 return True
1211
43fe570c
PM
1212 def _check_parsed_asn(self, asn):
1213 """
1214 Assistive function to filter Autonomous System Numbers not being suitable
1215 for adding to our database. Returns False in such cases, and True otherwise.
1216 """
1217
1218 for start, end in VALID_ASN_RANGES:
1219 if start <= asn and end >= asn:
1220 return True
1221
1222 log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
1223 return False
1224
c60e31ea
MT
1225 def _check_geofeed_url(self, url):
1226 """
1227 This function checks if a Geofeed URL is valid.
1228
1229 If so, it returns the normalized URL which should be stored instead of
1230 the original one.
1231 """
1232 # Parse the URL
1233 try:
1234 url = urllib.parse.urlparse(url)
1235 except ValueError as e:
1236 log.warning("Invalid URL %s: %s" % (url, e))
1237 return
1238
1239 # Make sure that this is a HTTPS URL
1240 if not url.scheme == "https":
1241 log.warning("Skipping Geofeed URL that is not using HTTPS: %s" \
1242 % url.geturl())
1243 return
1244
1245 # Normalize the URL and convert it back
1246 return url.geturl()
1247
d4697973 1248 def _parse_block(self, block, source_key, countries):
6ffd06b5
MT
1249 # Get first line to find out what type of block this is
1250 line = block[0]
1251
6ffd06b5 1252 # aut-num
429a43d1 1253 if line.startswith("aut-num:"):
28b08385 1254 return self._parse_autnum_block(block, source_key)
6ffd06b5 1255
aadac4c5
PM
1256 # inetnum
1257 if line.startswith("inet6num:") or line.startswith("inetnum:"):
d4697973 1258 return self._parse_inetnum_block(block, source_key, countries)
aadac4c5 1259
6ffd06b5
MT
1260 # organisation
1261 elif line.startswith("organisation:"):
28b08385 1262 return self._parse_org_block(block, source_key)
6ffd06b5 1263
28b08385 1264 def _parse_autnum_block(self, block, source_key):
6ffd06b5
MT
1265 autnum = {}
1266 for line in block:
1267 # Split line
1268 key, val = split_line(line)
1269
1270 if key == "aut-num":
1271 m = re.match(r"^(AS|as)(\d+)", val)
1272 if m:
1273 autnum["asn"] = m.group(2)
1274
0365119d 1275 elif key == "org":
e7d612e5 1276 autnum[key] = val.upper()
6ffd06b5 1277
426e0bee
PM
1278 elif key == "descr":
1279 # Save the first description line as well...
1280 if not key in autnum:
1281 autnum[key] = val
1282
6ffd06b5 1283 # Skip empty objects
426e0bee 1284 if not autnum or not "asn" in autnum:
6ffd06b5
MT
1285 return
1286
426e0bee
PM
1287 # Insert a dummy organisation handle into our temporary organisations
1288 # table in case the AS does not have an organisation handle set, but
1289 # has a description (a quirk often observed in APNIC area), so we can
1290 # later display at least some string for this AS.
1291 if not "org" in autnum:
1292 if "descr" in autnum:
1293 autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn")
1294
1295 self.db.execute("INSERT INTO _organizations(handle, name, source) \
1296 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
1297 autnum.get("org"), autnum.get("descr"), source_key,
1298 )
1299 else:
1300 log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
1301 autnum.get("asn"))
1302 return
1303
6ffd06b5 1304 # Insert into database
28b08385
PM
1305 self.db.execute("INSERT INTO _autnums(number, organization, source) \
1306 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
0365119d 1307 organization = excluded.organization",
28b08385 1308 autnum.get("asn"), autnum.get("org"), source_key,
6ffd06b5
MT
1309 )
1310
d4697973 1311 def _parse_inetnum_block(self, block, source_key, countries):
aadac4c5
PM
1312 inetnum = {}
1313 for line in block:
aadac4c5
PM
1314 # Split line
1315 key, val = split_line(line)
1316
84187ab5
PM
1317 # Filter any inetnum records which are only referring to IP space
1318 # not managed by that specific RIR...
1319 if key == "netname":
5254e5fc 1320 if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()):
2ba6ed07 1321 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
84187ab5
PM
1322 return
1323
aadac4c5
PM
1324 if key == "inetnum":
1325 start_address, delim, end_address = val.partition("-")
1326
1327 # Strip any excess space
1328 start_address, end_address = start_address.rstrip(), end_address.strip()
1329
845da577
PM
1330 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
1331 if start_address and not (delim or end_address):
1332 try:
1333 start_address = ipaddress.ip_network(start_address, strict=False)
1334 except ValueError:
1335 start_address = start_address.split("/")
38d0faea 1336 ldigits = start_address[0].count(".")
845da577
PM
1337
1338 # How many octets do we need to add?
1339 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
2ad36187 1340 if ldigits == 1:
845da577 1341 start_address = start_address[0] + ".0.0/" + start_address[1]
2ad36187 1342 elif ldigits == 2:
845da577
PM
1343 start_address = start_address[0] + ".0/" + start_address[1]
1344 else:
1345 log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line)
1346 return
1347
1348 try:
1349 start_address = ipaddress.ip_network(start_address, strict=False)
1350 except ValueError:
1351 log.warning("Could not parse line in LACNIC DB format: %s" % line)
1352 return
1353
1354 # Enumerate first and last IP address of this network
1355 end_address = start_address[-1]
1356 start_address = start_address[0]
1357
1358 else:
1359 # Convert to IP address
1360 try:
1361 start_address = ipaddress.ip_address(start_address)
1362 end_address = ipaddress.ip_address(end_address)
1363 except ValueError:
1364 log.warning("Could not parse line: %s" % line)
1365 return
aadac4c5 1366
1814283b 1367 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
aadac4c5
PM
1368
1369 elif key == "inet6num":
1814283b 1370 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
aadac4c5
PM
1371
1372 elif key == "country":
5487ee0d 1373 cc = val.upper()
b6b2b331 1374
30d32299 1375 # Ignore certain country codes
5487ee0d
MT
1376 if cc in IGNORED_COUNTRIES:
1377 log.debug("Ignoring country code '%s'" % cc)
30d32299
MT
1378 continue
1379
afc966db
MT
1380 # Translate country codes
1381 try:
5487ee0d 1382 cc = TRANSLATED_COUNTRIES[cc]
afc966db
MT
1383 except KeyError:
1384 pass
01e5f0ff 1385
a9e72406
MT
1386 # Do we know this country?
1387 if not cc in countries:
1388 log.warning("Skipping invalid country code '%s'" % cc)
1389 continue
1390
5487ee0d
MT
1391 try:
1392 inetnum[key].append(cc)
1393 except KeyError:
1394 inetnum[key] = [cc]
aadac4c5 1395
183b2f74
MT
1396 # Parse the geofeed attribute
1397 elif key == "geofeed":
1398 inetnum["geofeed"] = val
1399
1400 # Parse geofeed when used as a remark
45c0536c
MT
1401 elif key == "remarks":
1402 m = re.match(r"^(?:Geofeed)\s+(https://.*)", val)
183b2f74
MT
1403 if m:
1404 inetnum["geofeed"] = m.group(1)
1405
aadac4c5 1406 # Skip empty objects
39bfa052 1407 if not inetnum:
aadac4c5
PM
1408 return
1409
1814283b
PM
1410 # Iterate through all networks enumerated from above, check them for plausibility and insert
1411 # them into the database, if _check_parsed_network() succeeded
1412 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
39bfa052
MT
1413 if not self._check_parsed_network(single_network):
1414 continue
1415
1416 # Fetch the countries or use a list with an empty country
1417 countries = inetnum.get("country", [None])
1418
1419 # Insert the network into the database but only use the first country code
1420 for cc in countries:
1421 self.db.execute("""
1422 INSERT INTO
1423 _rirdata
1424 (
1425 network,
1426 country,
1427 original_countries,
1428 source
1429 )
1430 VALUES
1431 (
1432 %s, %s, %s, %s
1433 )
1434 ON CONFLICT (network)
1435 DO UPDATE SET country = excluded.country
1436 """, "%s" % single_network, cc, [cc for cc in countries if cc], source_key,
1814283b 1437 )
aadac4c5 1438
39bfa052
MT
1439 # If there are more than one country, we will only use the first one
1440 break
183b2f74 1441
39bfa052
MT
1442 # Update any geofeed information
1443 geofeed = inetnum.get("geofeed", None)
1444 if geofeed:
b8f3075b 1445 self._parse_geofeed(source_key, geofeed, single_network)
39bfa052 1446
b8f3075b 1447 def _parse_geofeed(self, source, url, single_network):
c60e31ea
MT
1448 # Check the URL
1449 url = self._check_geofeed_url(url)
1450 if not url:
da832d44
MT
1451 return
1452
da832d44
MT
1453 # Store/update any geofeeds
1454 self.db.execute("""
1455 INSERT INTO
b8f3075b
MT
1456 network_geofeeds
1457 (
1458 network,
1459 url,
1460 source
1461 )
1462 VALUES
1463 (
1464 %s, %s, %s
da832d44 1465 )
1042baa7
MT
1466 ON CONFLICT
1467 (
2b96ed27 1468 network, url
1042baa7
MT
1469 )
1470 DO UPDATE SET
1042baa7
MT
1471 source = excluded.source
1472 """, "%s" % single_network, url, source,
da832d44
MT
1473 )
1474
28b08385 1475 def _parse_org_block(self, block, source_key):
6ffd06b5
MT
1476 org = {}
1477 for line in block:
1478 # Split line
1479 key, val = split_line(line)
1480
e7d612e5
PM
1481 if key == "organisation":
1482 org[key] = val.upper()
1483 elif key == "org-name":
6ffd06b5
MT
1484 org[key] = val
1485
1486 # Skip empty objects
1487 if not org:
1488 return
1489
28b08385
PM
1490 self.db.execute("INSERT INTO _organizations(handle, name, source) \
1491 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
0365119d 1492 UPDATE SET name = excluded.name",
28b08385 1493 org.get("organisation"), org.get("org-name"), source_key,
6ffd06b5
MT
1494 )
1495
4b931f6c 1496 def _parse_line(self, line, source_key, validcountries=None):
429a43d1
MT
1497 # Skip version line
1498 if line.startswith("2"):
1499 return
6ffd06b5 1500
429a43d1
MT
1501 # Skip comments
1502 if line.startswith("#"):
1503 return
6ffd06b5 1504
429a43d1
MT
1505 try:
1506 registry, country_code, type, line = line.split("|", 3)
1507 except:
1508 log.warning("Could not parse line: %s" % line)
1509 return
6ffd06b5 1510
9a9b0698
MT
1511 # Skip ASN
1512 if type == "asn":
1513 return
1514
4b931f6c 1515 # Skip any unknown protocols
9a9b0698 1516 elif not type in ("ipv6", "ipv4"):
4b931f6c
MT
1517 log.warning("Unknown IP protocol '%s'" % type)
1518 return
1519
84b175e2
PM
1520 # Skip any lines that are for stats only or do not have a country
1521 # code at all (avoids log spam below)
1522 if not country_code or country_code == '*':
1523 return
1524
1525 # Skip objects with unknown country codes
1526 if validcountries and country_code not in validcountries:
1527 log.warning("Skipping line with bogus country '%s': %s" % \
1528 (country_code, line))
6ffd06b5
MT
1529 return
1530
429a43d1
MT
1531 try:
1532 address, prefix, date, status, organization = line.split("|")
1533 except ValueError:
1534 organization = None
1535
1536 # Try parsing the line without organization
1537 try:
1538 address, prefix, date, status = line.split("|")
1539 except ValueError:
1540 log.warning("Unhandled line format: %s" % line)
1541 return
1542
1543 # Skip anything that isn't properly assigned
1544 if not status in ("assigned", "allocated"):
1545 return
1546
1547 # Cast prefix into an integer
1548 try:
1549 prefix = int(prefix)
1550 except:
1551 log.warning("Invalid prefix: %s" % prefix)
7177031f 1552 return
429a43d1
MT
1553
1554 # Fix prefix length for IPv4
1555 if type == "ipv4":
1556 prefix = 32 - int(math.log(prefix, 2))
1557
1558 # Try to parse the address
1559 try:
1560 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
1561 except ValueError:
1562 log.warning("Invalid IP address: %s" % address)
1563 return
1564
bd341642
PM
1565 if not self._check_parsed_network(network):
1566 return
1567
4b931f6c
MT
1568 self.db.execute("""
1569 INSERT INTO
1570 networks
1571 (
1572 network,
1573 country,
1574 original_countries,
1575 source
1576 )
1577 VALUES
1578 (
1579 %s, %s, %s, %s
1580 )
1581 ON CONFLICT (network)
1582 DO UPDATE SET country = excluded.country
60f09cda 1583 """, "%s" % network, country_code, [country_code], source_key,
6ffd06b5
MT
1584 )
1585
98d9079b 1586 async def handle_update_announcements(self, ns):
83d61c46
MT
1587 server = ns.server[0]
1588
22d8d199
MT
1589 with self.db.transaction():
1590 if server.startswith("/"):
98d9079b 1591 await self._handle_update_announcements_from_bird(server)
22d8d199
MT
1592
1593 # Purge anything we never want here
1594 self.db.execute("""
1595 -- Delete default routes
1596 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
1597
1598 -- Delete anything that is not global unicast address space
1599 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
1600
1601 -- DELETE "current network" address space
1602 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
1603
1604 -- DELETE local loopback address space
1605 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
1606
1607 -- DELETE RFC 1918 address space
1608 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
1609 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
1610 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
1611
1612 -- DELETE test, benchmark and documentation address space
1613 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
1614 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
1615 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
1616 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
1617 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
1618
1619 -- DELETE CGNAT address space (RFC 6598)
1620 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
1621
1622 -- DELETE link local address space
1623 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
1624
b4d5b2a6 1625 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
22d8d199 1626 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
b4d5b2a6 1627 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
22d8d199
MT
1628
1629 -- DELETE multicast and reserved address space
1630 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
1631 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
1632
1633 -- Delete networks that are too small to be in the global routing table
1634 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
1635 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
1636
1637 -- Delete any non-public or reserved ASNs
1638 DELETE FROM announcements WHERE NOT (
1639 (autnum >= 1 AND autnum <= 23455)
1640 OR
1641 (autnum >= 23457 AND autnum <= 64495)
1642 OR
1643 (autnum >= 131072 AND autnum <= 4199999999)
1644 );
1645
1646 -- Delete everything that we have not seen for 14 days
1647 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
1648 """)
1649
98d9079b 1650 async def _handle_update_announcements_from_bird(self, server):
22d8d199 1651 # Pre-compile the regular expression for faster searching
35e5f041 1652 route = re.compile(b"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
22d8d199
MT
1653
1654 log.info("Requesting routing table from Bird (%s)" % server)
1655
35e5f041
MT
1656 aggregated_networks = []
1657
22d8d199
MT
1658 # Send command to list all routes
1659 for line in self._bird_cmd(server, "show route"):
1660 m = route.match(line)
1661 if not m:
b767db8d
MT
1662 # Skip empty lines
1663 if not line:
1664 pass
1665
45fa97cd 1666 # Ignore any header lines with the name of the routing table
b767db8d
MT
1667 elif line.startswith(b"Table"):
1668 pass
45fa97cd
MT
1669
1670 # Log anything else
b767db8d
MT
1671 else:
1672 log.debug("Could not parse line: %s" % line.decode())
1673
22d8d199
MT
1674 continue
1675
1676 # Fetch the extracted network and ASN
1677 network, autnum = m.groups()
1678
35e5f041
MT
1679 # Decode into strings
1680 if network:
1681 network = network.decode()
1682 if autnum:
1683 autnum = autnum.decode()
1684
1685 # Collect all aggregated networks
1686 if not autnum:
1687 log.debug("%s is an aggregated network" % network)
1688 aggregated_networks.append(network)
1689 continue
1690
22d8d199
MT
1691 # Insert it into the database
1692 self.db.execute("INSERT INTO announcements(network, autnum) \
1693 VALUES(%s, %s) ON CONFLICT (network) DO \
1694 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
35e5f041 1695 network, autnum,
22d8d199
MT
1696 )
1697
35e5f041
MT
1698 # Process any aggregated networks
1699 for network in aggregated_networks:
1700 log.debug("Processing aggregated network %s" % network)
1701
1702 # Run "show route all" for each network
1703 for line in self._bird_cmd(server, "show route %s all" % network):
1704 # Try finding the path
1705 m = re.match(b"\s+BGP\.as_path:.* (\d+) {\d+}$", line)
1706 if m:
1707 # Select the last AS number in the path
1708 autnum = m.group(1).decode()
1709
1710 # Insert it into the database
1711 self.db.execute("INSERT INTO announcements(network, autnum) \
1712 VALUES(%s, %s) ON CONFLICT (network) DO \
1713 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1714 network, autnum,
1715 )
1716
1717 # We don't need to process any more
1718 break
1719
22d8d199
MT
1720 def _bird_cmd(self, socket_path, command):
1721 # Connect to the socket
1722 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1723 s.connect(socket_path)
cedee656 1724
22d8d199
MT
1725 # Allocate some buffer
1726 buffer = b""
83d61c46 1727
bbcefb38
MT
1728 log.debug("Sending Bird command: %s" % command)
1729
22d8d199
MT
1730 # Send the command
1731 s.send(b"%s\n" % command.encode())
209c04b6 1732
22d8d199
MT
1733 while True:
1734 # Fill up the buffer
1735 buffer += s.recv(4096)
209c04b6 1736
22d8d199
MT
1737 while True:
1738 # Search for the next newline
1739 pos = buffer.find(b"\n")
209c04b6 1740
22d8d199
MT
1741 # If we cannot find one, we go back and read more data
1742 if pos <= 0:
1743 break
209c04b6 1744
22d8d199
MT
1745 # Cut after the newline character
1746 pos += 1
b89cee80 1747
22d8d199
MT
1748 # Split the line we want and keep the rest in buffer
1749 line, buffer = buffer[:pos], buffer[pos:]
83d61c46 1750
bbcefb38
MT
1751 # Try parsing any status lines
1752 if len(line) > 4 and line[:4].isdigit() and line[4] in (32, 45):
1753 code, delim, line = int(line[:4]), line[4], line[5:]
1754
1755 log.debug("Received response code %s from bird" % code)
1756
1757 # End of output
1758 if code == 0:
1759 return
1760
1761 # Ignore hello line
1762 elif code == 1:
1763 continue
83d61c46 1764
22d8d199
MT
1765 # Otherwise return the line
1766 yield line
83d61c46 1767
98d9079b 1768 async def handle_update_geofeeds(self, ns):
ef8d1f67
MT
1769 # Sync geofeeds
1770 with self.db.transaction():
1771 # Delete all geofeeds which are no longer linked
1772 self.db.execute("""
1773 DELETE FROM
1774 geofeeds
1775 WHERE
3efe0297 1776 geofeeds.url NOT IN (
ef8d1f67 1777 SELECT
3efe0297 1778 network_geofeeds.url
ef8d1f67
MT
1779 FROM
1780 network_geofeeds
3efe0297
MT
1781 )
1782 """,
ef8d1f67
MT
1783 )
1784
1785 # Copy all geofeeds
1786 self.db.execute("""
3efe0297
MT
1787 WITH all_geofeeds AS (
1788 SELECT
1789 network_geofeeds.url
1790 FROM
1791 network_geofeeds
3efe0297 1792 )
ef8d1f67 1793 INSERT INTO
3efe0297
MT
1794 geofeeds
1795 (
1796 url
1797 )
ef8d1f67
MT
1798 SELECT
1799 url
1800 FROM
3efe0297 1801 all_geofeeds
ef8d1f67
MT
1802 ON CONFLICT (url)
1803 DO NOTHING
1804 """,
1805 )
1806
0fcdd689
MT
1807 # Fetch all Geofeeds that require an update
1808 geofeeds = self.db.query("""
1809 SELECT
1810 id,
1811 url
1812 FROM
1813 geofeeds
1814 WHERE
1815 updated_at IS NULL
1816 OR
1817 updated_at <= CURRENT_TIMESTAMP - INTERVAL '1 week'
1818 ORDER BY
1819 id
1820 """)
1821
98d9079b
MT
1822 ratelimiter = asyncio.Semaphore(32)
1823
430c51c2 1824 # Update all geofeeds
98d9079b
MT
1825 async with asyncio.TaskGroup() as tasks:
1826 for geofeed in geofeeds:
1827 task = tasks.create_task(
1828 self._fetch_geofeed(ratelimiter, geofeed),
1829 )
0fcdd689 1830
a4e50c7a
MT
1831 # Delete data from any feeds that did not update in the last two weeks
1832 with self.db.transaction():
1833 self.db.execute("""
1834 DELETE FROM
1835 geofeed_networks
1836 WHERE
1837 geofeed_networks.geofeed_id IN (
1838 SELECT
1839 geofeeds.id
1840 FROM
1841 geofeeds
1842 WHERE
1843 updated_at IS NULL
1844 OR
1845 updated_at <= CURRENT_TIMESTAMP - INTERVAL '2 weeks'
1846 )
1847 """)
1848
98d9079b
MT
1849 async def _fetch_geofeed(self, ratelimiter, geofeed):
1850 async with ratelimiter:
1851 log.debug("Fetching Geofeed %s" % geofeed.url)
0fcdd689 1852
98d9079b
MT
1853 with self.db.transaction():
1854 # Open the URL
1855 try:
1856 # Send the request
1857 f = await asyncio.to_thread(
776226b6
MT
1858 self.downloader.retrieve,
1859
1860 # Fetch the feed by its URL
1861 geofeed.url,
1862
1863 # Send some extra headers
98d9079b
MT
1864 headers={
1865 "User-Agent" : "location/%s" % location.__version__,
1866
1867 # We expect some plain text file in CSV format
1868 "Accept" : "text/csv, text/plain",
1869 },
1870
1871 # Don't wait longer than 10 seconds for a response
776226b6 1872 timeout=10,
98d9079b 1873 )
0fcdd689 1874
98d9079b
MT
1875 # Remove any previous data
1876 self.db.execute("DELETE FROM geofeed_networks \
1877 WHERE geofeed_id = %s", geofeed.id)
0fcdd689 1878
98d9079b 1879 lineno = 0
0fcdd689 1880
98d9079b 1881 # Read the output line by line
c86209e2
MT
1882 with self.db.pipeline():
1883 for line in f:
1884 lineno += 1
0fcdd689 1885
c86209e2
MT
1886 try:
1887 line = line.decode()
5e632485 1888
c86209e2
MT
1889 # Ignore any lines we cannot decode
1890 except UnicodeDecodeError:
1891 log.debug("Could not decode line %s in %s" \
1892 % (lineno, geofeed.url))
1893 continue
56f147ce 1894
c86209e2
MT
1895 # Strip any newline
1896 line = line.rstrip()
0fcdd689 1897
c86209e2
MT
1898 # Skip empty lines
1899 if not line:
1900 continue
0fcdd689 1901
c86209e2
MT
1902 # Skip comments
1903 elif line.startswith("#"):
1904 continue
990b3ad4 1905
c86209e2
MT
1906 # Try to parse the line
1907 try:
1908 fields = line.split(",", 5)
1909 except ValueError:
1910 log.debug("Could not parse line: %s" % line)
1911 continue
0fcdd689 1912
c86209e2
MT
1913 # Check if we have enough fields
1914 if len(fields) < 4:
1915 log.debug("Not enough fields in line: %s" % line)
1916 continue
0fcdd689 1917
c86209e2
MT
1918 # Fetch all fields
1919 network, country, region, city, = fields[:4]
0fcdd689 1920
c86209e2
MT
1921 # Try to parse the network
1922 try:
1923 network = ipaddress.ip_network(network, strict=False)
1924 except ValueError:
1925 log.debug("Could not parse network: %s" % network)
1926 continue
1927
1928 # Strip any excess whitespace from country codes
1929 country = country.strip()
1930
1931 # Make the country code uppercase
1932 country = country.upper()
1933
1934 # Check the country code
1935 if not country:
1936 log.debug("Empty country code in Geofeed %s line %s" \
1937 % (geofeed.url, lineno))
1938 continue
1939
1940 elif not location.country_code_is_valid(country):
1941 log.debug("Invalid country code in Geofeed %s:%s: %s" \
1942 % (geofeed.url, lineno, country))
1943 continue
1944
1945 # Write this into the database
1946 self.db.execute("""
1947 INSERT INTO
1948 geofeed_networks (
1949 geofeed_id,
1950 network,
1951 country,
1952 region,
1953 city
1954 )
1955 VALUES (%s, %s, %s, %s, %s)""",
1956 geofeed.id,
1957 "%s" % network,
1958 country,
1959 region,
1960 city,
1961 )
0fcdd689 1962
98d9079b
MT
1963 # Catch any HTTP errors
1964 except urllib.request.HTTPError as e:
1965 self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
1966 WHERE id = %s", e.code, "%s" % e, geofeed.id)
4ed16a14 1967
98d9079b
MT
1968 # Remove any previous data when the feed has been deleted
1969 if e.code == 404:
1970 self.db.execute("DELETE FROM geofeed_networks \
1971 WHERE geofeed_id = %s", geofeed.id)
72908243 1972
98d9079b
MT
1973 # Catch any other errors and connection timeouts
1974 except (http.client.InvalidURL, urllib.request.URLError, TimeoutError) as e:
1975 log.debug("Could not fetch URL %s: %s" % (geofeed.url, e))
5e632485 1976
98d9079b
MT
1977 self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
1978 WHERE id = %s", 599, "%s" % e, geofeed.id)
0fcdd689 1979
98d9079b
MT
1980 # Mark the geofeed as updated
1981 else:
81b28603 1982 self.db.execute("""
98d9079b
MT
1983 UPDATE
1984 geofeeds
1985 SET
1986 updated_at = CURRENT_TIMESTAMP,
1987 status = NULL,
1988 error = NULL
1989 WHERE
1990 id = %s""",
81b28603 1991 geofeed.id,
81b28603 1992 )
0fcdd689 1993
98d9079b 1994 async def handle_update_overrides(self, ns):
d7fc3057 1995 with self.db.transaction():
667ad134
MT
1996 # Drop any previous content
1997 self.db.execute("TRUNCATE TABLE autnum_overrides")
1998 self.db.execute("TRUNCATE TABLE network_overrides")
d7fc3057 1999
f5a4557d
MT
2000 # Remove all Geofeeds
2001 self.db.execute("DELETE FROM network_geofeeds WHERE source = %s", "overrides")
2002
d7fc3057
MT
2003 for file in ns.files:
2004 log.info("Reading %s..." % file)
2005
2006 with open(file, "rb") as f:
8c3529c7 2007 for type, block in read_blocks(f):
d7fc3057
MT
2008 if type == "net":
2009 network = block.get("net")
2010 # Try to parse and normalise the network
2011 try:
2012 network = ipaddress.ip_network(network, strict=False)
2013 except ValueError as e:
2014 log.warning("Invalid IP network: %s: %s" % (network, e))
2015 continue
2016
94dfab8c
MT
2017 # Prevent that we overwrite all networks
2018 if network.prefixlen == 0:
2019 log.warning("Skipping %s: You cannot overwrite default" % network)
2020 continue
2021
d7fc3057 2022 self.db.execute("""
667ad134
MT
2023 INSERT INTO
2024 network_overrides
2025 (
d7fc3057
MT
2026 network,
2027 country,
2028 is_anonymous_proxy,
2029 is_satellite_provider,
e17e804e
PM
2030 is_anycast,
2031 is_drop
667ad134
MT
2032 )
2033 VALUES
2034 (
2035 %s, %s, %s, %s, %s, %s
2036 )
2037 ON CONFLICT (network) DO NOTHING
2038 """,
d7fc3057
MT
2039 "%s" % network,
2040 block.get("country"),
28d29b7c
MT
2041 self._parse_bool(block, "is-anonymous-proxy"),
2042 self._parse_bool(block, "is-satellite-provider"),
2043 self._parse_bool(block, "is-anycast"),
e17e804e 2044 self._parse_bool(block, "drop"),
d7fc3057
MT
2045 )
2046
f476cdfd
MT
2047 elif type == "aut-num":
2048 autnum = block.get("aut-num")
d7fc3057
MT
2049
2050 # Check if AS number begins with "AS"
2051 if not autnum.startswith("AS"):
2052 log.warning("Invalid AS number: %s" % autnum)
2053 continue
2054
2055 # Strip "AS"
2056 autnum = autnum[2:]
2057
2058 self.db.execute("""
667ad134
MT
2059 INSERT INTO
2060 autnum_overrides
2061 (
d7fc3057
MT
2062 number,
2063 name,
bd1aa6a1 2064 country,
d7fc3057
MT
2065 is_anonymous_proxy,
2066 is_satellite_provider,
e17e804e
PM
2067 is_anycast,
2068 is_drop
667ad134
MT
2069 )
2070 VALUES
2071 (
2072 %s, %s, %s, %s, %s, %s, %s
2073 )
2074 ON CONFLICT (number) DO NOTHING
2075 """,
bd1aa6a1
MT
2076 autnum,
2077 block.get("name"),
2078 block.get("country"),
28d29b7c
MT
2079 self._parse_bool(block, "is-anonymous-proxy"),
2080 self._parse_bool(block, "is-satellite-provider"),
2081 self._parse_bool(block, "is-anycast"),
e17e804e 2082 self._parse_bool(block, "drop"),
d7fc3057
MT
2083 )
2084
3efe0297
MT
2085 # Geofeeds
2086 elif type == "geofeed":
f5a4557d
MT
2087 networks = []
2088
2089 # Fetch the URL
3efe0297
MT
2090 url = block.get("geofeed")
2091
f5a4557d
MT
2092 # Fetch permitted networks
2093 for n in block.get("network", []):
2094 try:
2095 n = ipaddress.ip_network(n)
2096 except ValueError as e:
2097 log.warning("Ignoring invalid network %s: %s" % (n, e))
2098 continue
2099
2100 networks.append(n)
2101
970d9ada 2102 # If no networks have been specified, permit for everything
f5a4557d 2103 if not networks:
970d9ada
MT
2104 networks = [
2105 ipaddress.ip_network("::/0"),
2106 ipaddress.ip_network("0.0.0.0/0"),
2107 ]
f5a4557d 2108
c60e31ea
MT
2109 # Check the URL
2110 url = self._check_geofeed_url(url)
2111 if not url:
2aeec4ce
MT
2112 continue
2113
f5a4557d 2114 # Store the Geofeed URL
3efe0297
MT
2115 self.db.execute("""
2116 INSERT INTO
f5a4557d 2117 geofeeds
3efe0297
MT
2118 (
2119 url
2120 )
2121 VALUES
2122 (
2123 %s
2124 )
2125 ON CONFLICT (url) DO NOTHING
2126 """, url,
2127 )
2128
f5a4557d
MT
2129 # Store all permitted networks
2130 self.db.executemany("""
2131 INSERT INTO
2132 network_geofeeds
2133 (
2134 network,
2135 url,
2136 source
2137 )
2138 VALUES
2139 (
2140 %s, %s, %s
2141 )
1042baa7
MT
2142 ON CONFLICT
2143 (
2b96ed27 2144 network, url
1042baa7
MT
2145 )
2146 DO UPDATE SET
1042baa7 2147 source = excluded.source
f5a4557d
MT
2148 """, (("%s" % n, url, "overrides") for n in networks),
2149 )
2150
d7fc3057 2151 else:
03d24a9b 2152 log.warning("Unsupported type: %s" % type)
d7fc3057 2153
98d9079b 2154 async def handle_update_feeds(self, ns):
93aad7f7
MT
2155 """
2156 Update any third-party feeds
2157 """
a775372b
MT
2158 success = True
2159
a775372b
MT
2160 feeds = (
2161 # AWS IP Ranges
2162 ("AWS-IP-RANGES", self._import_aws_ip_ranges, "https://ip-ranges.amazonaws.com/ip-ranges.json"),
377cead4
MT
2163
2164 # Spamhaus DROP
2165 ("SPAMHAUS-DROP", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/drop.txt"),
377cead4 2166 ("SPAMHAUS-DROPV6", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/dropv6.txt"),
5dcda5c8
MT
2167
2168 # Spamhaus ASNDROP
2169 ("SPAMHAUS-ASNDROP", self._import_spamhaus_asndrop, "https://www.spamhaus.org/drop/asndrop.json"),
a775372b 2170 )
659c2810
MT
2171
2172 # Drop any data from feeds that we don't support (any more)
2173 with self.db.transaction():
2174 # Fetch the names of all feeds we support
2175 sources = [name for name, *rest in feeds]
2176
2177 self.db.execute("DELETE FROM autnum_feeds WHERE NOT source = ANY(%s)", sources)
2178 self.db.execute("DELETE FROM network_feeds WHERE NOT source = ANY(%s)", sources)
a775372b
MT
2179
2180 # Walk through all feeds
2181 for name, callback, url, *args in feeds:
f7f8e714
MT
2182 # Skip any feeds that were not requested on the command line
2183 if ns.feeds and not name in ns.feeds:
2184 continue
2185
a775372b 2186 try:
98d9079b 2187 await self._process_feed(name, callback, url, *args)
a775372b
MT
2188
2189 # Log an error but continue if an exception occurs
2190 except Exception as e:
2191 log.error("Error processing feed '%s': %s" % (name, e))
2192 success = False
93aad7f7 2193
a775372b
MT
2194 # Return status
2195 return 0 if success else 1
dcef2ba4 2196
98d9079b 2197 async def _process_feed(self, name, callback, url, *args):
a775372b
MT
2198 """
2199 Processes one feed
2200 """
2201 # Open the URL
335946c0 2202 f = self.downloader.retrieve(url)
85e44d9a 2203
a775372b
MT
2204 with self.db.transaction():
2205 # Drop any previous content
2206 self.db.execute("DELETE FROM autnum_feeds WHERE source = %s", name)
2207 self.db.execute("DELETE FROM network_feeds WHERE source = %s", name)
dcef2ba4 2208
a775372b 2209 # Call the callback to process the feed
c86209e2
MT
2210 with self.db.pipeline():
2211 return await callback(name, f, *args)
a775372b 2212
98d9079b 2213 async def _import_aws_ip_ranges(self, name, f):
a775372b 2214 # Parse the feed
c30e421e 2215 feed = json.load(f)
68fdeb3b 2216
99e4a4eb 2217 # Set up a dictionary for mapping a region name to a country. Unfortunately,
dcef2ba4
PM
2218 # there seems to be no machine-readable version available of this other than
2219 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
2220 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
2221 # was helpful here as well.
2222 aws_region_country_map = {
99e4a4eb
MT
2223 # Africa
2224 "af-south-1" : "ZA",
2225
2226 # Asia
2227 "il-central-1" : "IL", # Tel Aviv
2228
2229 # Asia/Pacific
2230 "ap-northeast-1" : "JP",
2231 "ap-northeast-2" : "KR",
2232 "ap-northeast-3" : "JP",
2233 "ap-east-1" : "HK",
2234 "ap-south-1" : "IN",
2235 "ap-south-2" : "IN",
2236 "ap-southeast-1" : "SG",
2237 "ap-southeast-2" : "AU",
2238 "ap-southeast-3" : "MY",
2239 "ap-southeast-4" : "AU",
2240 "ap-southeast-5" : "NZ", # Auckland, NZ
2241 "ap-southeast-6" : "AP", # XXX: Precise location not documented anywhere
2242
2243 # Canada
2244 "ca-central-1" : "CA",
2245 "ca-west-1" : "CA",
2246
2247 # Europe
2248 "eu-central-1" : "DE",
2249 "eu-central-2" : "CH",
2250 "eu-north-1" : "SE",
2251 "eu-west-1" : "IE",
2252 "eu-west-2" : "GB",
2253 "eu-west-3" : "FR",
2254 "eu-south-1" : "IT",
2255 "eu-south-2" : "ES",
2256
2257 # Middle East
2258 "me-central-1" : "AE",
2259 "me-south-1" : "BH",
2260
2261 # South America
2262 "sa-east-1" : "BR",
2263
2264 # Undocumented, likely located in Berlin rather than Frankfurt
2265 "eusc-de-east-1" : "DE",
2266 }
dcef2ba4 2267
c30e421e
MT
2268 # Collect a list of all networks
2269 prefixes = feed.get("ipv6_prefixes", []) + feed.get("prefixes", [])
dcef2ba4 2270
c30e421e
MT
2271 for prefix in prefixes:
2272 # Fetch network
2273 network = prefix.get("ipv6_prefix") or prefix.get("ip_prefix")
2274
2275 # Parse the network
a775372b 2276 try:
c30e421e
MT
2277 network = ipaddress.ip_network(network)
2278 except ValuleError as e:
2279 log.warning("%s: Unable to parse prefix %s" % (name, network))
a775372b 2280 continue
dcef2ba4 2281
a775372b
MT
2282 # Sanitize parsed networks...
2283 if not self._check_parsed_network(network):
2284 continue
dcef2ba4 2285
c30e421e
MT
2286 # Fetch the region
2287 region = prefix.get("region")
2288
2289 # Set some defaults
a775372b
MT
2290 cc = None
2291 is_anycast = False
2292
c30e421e
MT
2293 # Fetch the CC from the dictionary
2294 try:
a775372b 2295 cc = aws_region_country_map[region]
dcef2ba4 2296
c30e421e
MT
2297 # If we couldn't find anything, let's try something else...
2298 except KeyError as e:
2299 # Find anycast networks
2300 if region == "GLOBAL":
2301 is_anycast = True
2302
2303 # Everything that starts with us- is probably in the United States
2304 elif region.startswith("us-"):
2305 cc = "US"
2306
2307 # Everything that starts with cn- is probably China
2308 elif region.startswith("cn-"):
2309 cc = "CN"
2310
2311 # Log a warning for anything else
2312 else:
2313 log.warning("%s: Could not determine country code for AWS region %s" \
2314 % (name, region))
2315 continue
dcef2ba4 2316
c30e421e 2317 # Write to database
a775372b
MT
2318 self.db.execute("""
2319 INSERT INTO
2320 network_feeds
2321 (
2322 network,
2323 source,
2324 country,
2325 is_anycast
dcef2ba4 2326 )
a775372b
MT
2327 VALUES
2328 (
2329 %s, %s, %s, %s
2330 )
2331 ON CONFLICT (network, source) DO NOTHING
c30e421e 2332 """, "%s" % network, name, cc, is_anycast,
a775372b 2333 )
dcef2ba4 2334
98d9079b 2335 async def _import_spamhaus_drop(self, name, f):
377cead4
MT
2336 """
2337 Import Spamhaus DROP IP feeds
2338 """
2339 # Count all lines
2340 lines = 0
69b3d894 2341
377cead4
MT
2342 # Walk through all lines
2343 for line in f:
2344 # Decode line
2345 line = line.decode("utf-8")
69b3d894 2346
377cead4
MT
2347 # Strip off any comments
2348 line, _, comment = line.partition(";")
69b3d894 2349
377cead4
MT
2350 # Ignore empty lines
2351 if not line:
2352 continue
85e44d9a 2353
377cead4
MT
2354 # Strip any excess whitespace
2355 line = line.strip()
69b3d894 2356
377cead4
MT
2357 # Increment line counter
2358 lines += 1
5acac2a4 2359
377cead4
MT
2360 # Parse the network
2361 try:
2362 network = ipaddress.ip_network(line)
2363 except ValueError as e:
2364 log.warning("%s: Could not parse network: %s - %s" % (name, line, e))
2365 continue
69b3d894 2366
377cead4
MT
2367 # Check network
2368 if not self._check_parsed_network(network):
2369 log.warning("%s: Skipping bogus network: %s" % (name, network))
2370 continue
69b3d894 2371
377cead4
MT
2372 # Insert into the database
2373 self.db.execute("""
2374 INSERT INTO
2375 network_feeds
2376 (
2377 network,
2378 source,
2379 is_drop
2380 )
2381 VALUES
2382 (
2383 %s, %s, %s
2384 )""", "%s" % network, name, True,
2385 )
69b3d894 2386
377cead4
MT
2387 # Raise an exception if we could not import anything
2388 if not lines:
2389 raise RuntimeError("Received bogus feed %s with no data" % name)
69b3d894 2390
98d9079b 2391 async def _import_spamhaus_asndrop(self, name, f):
5dcda5c8
MT
2392 """
2393 Import Spamhaus ASNDROP feed
2394 """
2395 for line in f:
2396 # Decode the line
2397 line = line.decode("utf-8")
5acac2a4 2398
5dcda5c8
MT
2399 # Parse JSON
2400 try:
2401 line = json.loads(line)
2402 except json.JSONDecodeError as e:
2403 log.warning("%s: Unable to parse JSON object %s: %s" % (name, line, e))
2404 continue
69b3d894 2405
5dcda5c8
MT
2406 # Fetch type
2407 type = line.get("type")
69b3d894 2408
5dcda5c8
MT
2409 # Skip any metadata
2410 if type == "metadata":
2411 continue
69b3d894 2412
5dcda5c8
MT
2413 # Fetch ASN
2414 asn = line.get("asn")
69b3d894 2415
5dcda5c8
MT
2416 # Skip any lines without an ASN
2417 if not asn:
2418 continue
69b3d894 2419
5dcda5c8
MT
2420 # Filter invalid ASNs
2421 if not self._check_parsed_asn(asn):
2422 log.warning("%s: Skipping bogus ASN %s" % (name, asn))
2423 continue
69b3d894 2424
5dcda5c8
MT
2425 # Write to database
2426 self.db.execute("""
2427 INSERT INTO
2428 autnum_feeds
2429 (
2430 number,
2431 source,
2432 is_drop
2433 )
2434 VALUES
2435 (
2436 %s, %s, %s
2437 )""", "%s" % asn, name, True,
2438 )
69b3d894 2439
28d29b7c
MT
2440 @staticmethod
2441 def _parse_bool(block, key):
2442 val = block.get(key)
2443
2444 # There is no point to proceed when we got None
2445 if val is None:
2446 return
2447
2448 # Convert to lowercase
2449 val = val.lower()
2450
2451 # True
2452 if val in ("yes", "1"):
2453 return True
2454
2455 # False
2456 if val in ("no", "0"):
2457 return False
2458
2459 # Default to None
2460 return None
2461
98d9079b 2462 async def handle_import_countries(self, ns):
8084b33a
MT
2463 with self.db.transaction():
2464 # Drop all data that we have
2465 self.db.execute("TRUNCATE TABLE countries")
2466
2467 for file in ns.file:
2468 for line in file:
2469 line = line.rstrip()
2470
2471 # Ignore any comments
2472 if line.startswith("#"):
2473 continue
2474
2475 try:
2476 country_code, continent_code, name = line.split(maxsplit=2)
2477 except:
2478 log.warning("Could not parse line: %s" % line)
2479 continue
2480
2481 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
2482 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
2483
6ffd06b5
MT
2484
2485def split_line(line):
2486 key, colon, val = line.partition(":")
2487
2488 # Strip any excess space
2489 key = key.strip()
2490 val = val.strip()
78ff0cf2 2491
6ffd06b5 2492 return key, val
78ff0cf2 2493
8c3529c7
MT
2494def read_blocks(f):
2495 for block in iterate_over_blocks(f):
2496 type = None
2497 data = {}
2498
2499 for i, line in enumerate(block):
2500 key, value = line.split(":", 1)
2501
2502 # The key of the first line defines the type
2503 if i == 0:
2504 type = key
2505
f5a4557d
MT
2506 # Strip any excess whitespace
2507 value = value.strip()
2508
2509 # Store some values as a list
2510 if type == "geofeed" and key == "network":
2511 try:
2512 data[key].append(value)
2513 except KeyError:
2514 data[key] = [value]
2515
2516 # Otherwise store the value as string
2517 else:
2518 data[key] = value
8c3529c7
MT
2519
2520 yield type, data
2521
2522def iterate_over_blocks(f, charsets=("utf-8", "latin1")):
2523 block = []
2524
2525 for line in f:
2526 # Skip commented lines
2527 if line.startswith(b"#") or line.startswith(b"%"):
2528 continue
2529
2530 # Convert to string
2531 for charset in charsets:
2532 try:
2533 line = line.decode(charset)
2534 except UnicodeDecodeError:
2535 continue
2536 else:
2537 break
2538
2539 # Remove any comments at the end of line
2540 line, hash, comment = line.partition("#")
2541
2542 # Strip any whitespace at the end of the line
2543 line = line.rstrip()
2544
2545 # If we cut off some comment and the line is empty, we can skip it
2546 if comment and not line:
2547 continue
2548
2549 # If the line has some content, keep collecting it
2550 if line:
2551 block.append(line)
2552 continue
2553
2554 # End the block on an empty line
2555 if block:
2556 yield block
2557
2558 # Reset the block
2559 block = []
2560
2561 # Return the last block
2562 if block:
2563 yield block
2564
2565def iterate_over_lines(f):
2566 for line in f:
2567 # Decode the line
2568 line = line.decode()
2569
2570 # Strip the ending
2571 yield line.rstrip()
2572
98d9079b 2573async def main():
78ff0cf2
MT
2574 # Run the command line interface
2575 c = CLI()
78ff0cf2 2576
98d9079b
MT
2577 await c.run()
2578
2579asyncio.run(main())