]> git.ipfire.org Git - people/ms/libloc.git/blame - src/scripts/location-importer.in
importer: Remove a debugging line
[people/ms/libloc.git] / src / scripts / location-importer.in
CommitLineData
78ff0cf2
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
5c7cfeb2 6# Copyright (C) 2020-2024 IPFire Development Team <info@ipfire.org> #
78ff0cf2
MT
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import argparse
98d9079b 21import asyncio
f4f3d8ad 22import csv
81b28603 23import functools
a6fedd9f 24import http.client
fa5dccf1 25import io
6ffd06b5 26import ipaddress
dcef2ba4 27import json
78ff0cf2 28import logging
6ffd06b5
MT
29import math
30import re
22d8d199 31import socket
78ff0cf2 32import sys
3ce28dea 33import urllib.error
78ff0cf2
MT
34
35# Load our location module
36import location
29c6fa22 37import location.database
335946c0 38from location.downloader import Downloader
78ff0cf2
MT
39from location.i18n import _
40
41# Initialise logging
42log = logging.getLogger("location.importer")
43log.propagate = 1
44
43fe570c
PM
45# Define constants
46VALID_ASN_RANGES = (
47 (1, 23455),
48 (23457, 64495),
49 (131072, 4199999999),
50)
51
afc966db
MT
52TRANSLATED_COUNTRIES = {
53 # When people say UK, they mean GB
54 "UK" : "GB",
55}
56
30d32299
MT
57IGNORED_COUNTRIES = set((
58 # Formerly Yugoslavia
59 "YU",
60
61 # Some people use ZZ to say "no country" or to hide the country
62 "ZZ",
63))
64
18a72eac
MT
65# Configure the CSV parser for ARIN
66csv.register_dialect("arin", delimiter=",", quoting=csv.QUOTE_ALL, quotechar="\"")
43fe570c 67
78ff0cf2
MT
68class CLI(object):
69 def parse_cli(self):
70 parser = argparse.ArgumentParser(
71 description=_("Location Importer Command Line Interface"),
72 )
6ffd06b5 73 subparsers = parser.add_subparsers()
78ff0cf2
MT
74
75 # Global configuration flags
76 parser.add_argument("--debug", action="store_true",
77 help=_("Enable debug output"))
bc1f5f53
MT
78 parser.add_argument("--quiet", action="store_true",
79 help=_("Enable quiet mode"))
78ff0cf2
MT
80
81 # version
82 parser.add_argument("--version", action="version",
83 version="%(prog)s @VERSION@")
84
29c6fa22
MT
85 # Database
86 parser.add_argument("--database-host", required=True,
87 help=_("Database Hostname"), metavar=_("HOST"))
88 parser.add_argument("--database-name", required=True,
89 help=_("Database Name"), metavar=_("NAME"))
90 parser.add_argument("--database-username", required=True,
91 help=_("Database Username"), metavar=_("USERNAME"))
92 parser.add_argument("--database-password", required=True,
93 help=_("Database Password"), metavar=_("PASSWORD"))
94
0983f3dd
MT
95 # Write Database
96 write = subparsers.add_parser("write", help=_("Write database to file"))
97 write.set_defaults(func=self.handle_write)
98 write.add_argument("file", nargs=1, help=_("Database File"))
99 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
1164d876 100 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
0983f3dd
MT
101 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
102 write.add_argument("--description", nargs="?", help=_("Sets a description"))
103 write.add_argument("--license", nargs="?", help=_("Sets the license"))
b904896a 104 write.add_argument("--version", type=int, help=_("Database Format Version"))
0983f3dd 105
6ffd06b5
MT
106 # Update WHOIS
107 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
d77d05d1
MT
108 update_whois.add_argument("sources", nargs="*",
109 help=_("Only update these sources"))
6ffd06b5
MT
110 update_whois.set_defaults(func=self.handle_update_whois)
111
83d61c46
MT
112 # Update announcements
113 update_announcements = subparsers.add_parser("update-announcements",
114 help=_("Update BGP Annoucements"))
115 update_announcements.set_defaults(func=self.handle_update_announcements)
116 update_announcements.add_argument("server", nargs=1,
117 help=_("Route Server to connect to"), metavar=_("SERVER"))
118
0fcdd689
MT
119 # Update geofeeds
120 update_geofeeds = subparsers.add_parser("update-geofeeds",
121 help=_("Update Geofeeds"))
122 update_geofeeds.set_defaults(func=self.handle_update_geofeeds)
123
93aad7f7
MT
124 # Update feeds
125 update_feeds = subparsers.add_parser("update-feeds",
126 help=_("Update Feeds"))
f7f8e714
MT
127 update_feeds.add_argument("feeds", nargs="*",
128 help=_("Only update these feeds"))
93aad7f7
MT
129 update_feeds.set_defaults(func=self.handle_update_feeds)
130
d7fc3057
MT
131 # Update overrides
132 update_overrides = subparsers.add_parser("update-overrides",
133 help=_("Update overrides"),
134 )
135 update_overrides.add_argument(
136 "files", nargs="+", help=_("Files to import"),
137 )
138 update_overrides.set_defaults(func=self.handle_update_overrides)
139
8084b33a
MT
140 # Import countries
141 import_countries = subparsers.add_parser("import-countries",
142 help=_("Import countries"),
143 )
144 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
145 help=_("File to import"))
146 import_countries.set_defaults(func=self.handle_import_countries)
147
78ff0cf2
MT
148 args = parser.parse_args()
149
bc1f5f53 150 # Configure logging
78ff0cf2 151 if args.debug:
f9de5e61 152 location.logger.set_level(logging.DEBUG)
bc1f5f53
MT
153 elif args.quiet:
154 location.logger.set_level(logging.WARNING)
78ff0cf2 155
6ffd06b5
MT
156 # Print usage if no action was given
157 if not "func" in args:
158 parser.print_usage()
159 sys.exit(2)
160
78ff0cf2
MT
161 return args
162
98d9079b 163 async def run(self):
78ff0cf2
MT
164 # Parse command line arguments
165 args = self.parse_cli()
166
335946c0
MT
167 # Initialize the downloader
168 self.downloader = Downloader()
169
29c6fa22 170 # Initialise database
6ffd06b5 171 self.db = self._setup_database(args)
29c6fa22 172
78ff0cf2 173 # Call function
98d9079b 174 ret = await args.func(args)
78ff0cf2
MT
175
176 # Return with exit code
177 if ret:
178 sys.exit(ret)
179
180 # Otherwise just exit
181 sys.exit(0)
182
29c6fa22
MT
183 def _setup_database(self, ns):
184 """
185 Initialise the database
186 """
187 # Connect to database
188 db = location.database.Connection(
189 host=ns.database_host, database=ns.database_name,
190 user=ns.database_username, password=ns.database_password,
191 )
192
193 with db.transaction():
194 db.execute("""
83d61c46
MT
195 -- announcements
196 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
197 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
198 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
199 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
200 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
a1707d89 201 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
83d61c46 202
6ffd06b5 203 -- autnums
0983f3dd 204 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
26f06e70 205 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
6ffd06b5
MT
206 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
207
8084b33a
MT
208 -- countries
209 CREATE TABLE IF NOT EXISTS countries(
210 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
211 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
212
429a43d1 213 -- networks
83d61c46 214 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
b6b2b331 215 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
26f06e70 216 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
429a43d1 217 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
002deb6b 218 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
83d61c46 219 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
d7fc3057 220
183b2f74 221 -- geofeeds
0fcdd689
MT
222 CREATE TABLE IF NOT EXISTS geofeeds(
223 id serial primary key,
224 url text,
225 status integer default null,
226 updated_at timestamp without time zone default null
227 );
ce308393 228 ALTER TABLE geofeeds ADD COLUMN IF NOT EXISTS error text;
0fcdd689
MT
229 CREATE UNIQUE INDEX IF NOT EXISTS geofeeds_unique
230 ON geofeeds(url);
231 CREATE TABLE IF NOT EXISTS geofeed_networks(
232 geofeed_id integer references geofeeds(id) on delete cascade,
233 network inet,
234 country text,
235 region text,
236 city text
237 );
238 CREATE INDEX IF NOT EXISTS geofeed_networks_geofeed_id
239 ON geofeed_networks(geofeed_id);
240 CREATE INDEX IF NOT EXISTS geofeed_networks_search
47fc42fb 241 ON geofeed_networks USING GIST(network inet_ops);
183b2f74 242 CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
b8f3075b 243 ALTER TABLE network_geofeeds ADD COLUMN IF NOT EXISTS source text NOT NULL;
183b2f74
MT
244 CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique
245 ON network_geofeeds(network);
7f0a6add
MT
246 CREATE INDEX IF NOT EXISTS network_geofeeds_search
247 ON network_geofeeds USING GIST(network inet_ops);
248 CREATE INDEX IF NOT EXISTS network_geofeeds_url
249 ON network_geofeeds(url);
183b2f74 250
7808c8b7
MT
251 -- feeds
252 CREATE TABLE IF NOT EXISTS autnum_feeds(
253 number bigint NOT NULL,
254 source text NOT NULL,
255 name text,
256 country text,
257 is_anonymous_proxy boolean,
258 is_satellite_provider boolean,
259 is_anycast boolean,
260 is_drop boolean
261 );
262 CREATE UNIQUE INDEX IF NOT EXISTS autnum_feeds_unique
263 ON autnum_feeds(number, source);
264
c923c1cc
MT
265 CREATE TABLE IF NOT EXISTS network_feeds(
266 network inet NOT NULL,
267 source text NOT NULL,
268 country text,
269 is_anonymous_proxy boolean,
270 is_satellite_provider boolean,
271 is_anycast boolean,
272 is_drop boolean
273 );
274 CREATE UNIQUE INDEX IF NOT EXISTS network_feeds_unique
275 ON network_feeds(network, source);
276 CREATE INDEX IF NOT EXISTS network_feeds_search
277 ON network_feeds USING GIST(network inet_ops);
278
d7fc3057
MT
279 -- overrides
280 CREATE TABLE IF NOT EXISTS autnum_overrides(
281 number bigint NOT NULL,
282 name text,
bd1aa6a1 283 country text,
b8e25b71
MT
284 is_anonymous_proxy boolean,
285 is_satellite_provider boolean,
286 is_anycast boolean
d7fc3057
MT
287 );
288 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
289 ON autnum_overrides(number);
e17e804e 290 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
667ad134 291 ALTER TABLE autnum_overrides DROP COLUMN IF EXISTS source;
d7fc3057
MT
292
293 CREATE TABLE IF NOT EXISTS network_overrides(
294 network inet NOT NULL,
295 country text,
b8e25b71
MT
296 is_anonymous_proxy boolean,
297 is_satellite_provider boolean,
298 is_anycast boolean
d7fc3057
MT
299 );
300 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
301 ON network_overrides(network);
991baf53
MT
302 CREATE INDEX IF NOT EXISTS network_overrides_search
303 ON network_overrides USING GIST(network inet_ops);
e17e804e 304 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
667ad134 305 ALTER TABLE network_overrides DROP COLUMN IF EXISTS source;
3efe0297
MT
306
307 CREATE TABLE IF NOT EXISTS geofeed_overrides(
308 url text NOT NULL
309 );
310 CREATE UNIQUE INDEX IF NOT EXISTS geofeed_overrides_url
311 ON geofeed_overrides(url);
29c6fa22
MT
312 """)
313
314 return db
315
e61a9352
MT
316 def fetch_countries(self):
317 """
318 Returns a list of all countries on the list
319 """
320 # Fetch all valid country codes to check parsed networks aganist...
321 countries = self.db.query("SELECT country_code FROM countries ORDER BY country_code")
322
ae20698a 323 return set((country.country_code for country in countries))
e61a9352 324
98d9079b 325 async def handle_write(self, ns):
0983f3dd
MT
326 """
327 Compiles a database in libloc format out of what is in the database
328 """
0983f3dd 329 # Allocate a writer
1164d876 330 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
0983f3dd
MT
331
332 # Set all metadata
333 if ns.vendor:
334 writer.vendor = ns.vendor
335
336 if ns.description:
337 writer.description = ns.description
338
339 if ns.license:
340 writer.license = ns.license
341
342 # Add all Autonomous Systems
343 log.info("Writing Autonomous Systems...")
344
345 # Select all ASes with a name
6e97c44b
MT
346 rows = self.db.query("""
347 SELECT
348 autnums.number AS number,
349 COALESCE(
ef1581fb 350 overrides.name,
6e97c44b
MT
351 autnums.name
352 ) AS name
ef1581fb
MT
353 FROM
354 autnums
355 LEFT JOIN
356 autnum_overrides overrides ON autnums.number = overrides.number
357 ORDER BY
358 autnums.number
359 """)
0983f3dd
MT
360
361 for row in rows:
008d5468
MT
362 # Skip AS without names
363 if not row.name:
364 continue
365
0983f3dd
MT
366 a = writer.add_as(row.number)
367 a.name = row.name
368
369 # Add all networks
370 log.info("Writing networks...")
371
372 # Select all known networks
373 rows = self.db.query("""
295a7774
MT
374 WITH known_networks AS (
375 SELECT network FROM announcements
376 UNION
377 SELECT network FROM networks
378 UNION
25b60228 379 SELECT network FROM network_feeds
c923c1cc 380 UNION
295a7774 381 SELECT network FROM network_overrides
e87dbd44
MT
382 UNION
383 SELECT network FROM geofeed_networks
295a7774
MT
384 ),
385
386 ordered_networks AS (
387 SELECT
388 known_networks.network AS network,
389 announcements.autnum AS autnum,
390 networks.country AS country,
391
392 -- Must be part of returned values for ORDER BY clause
393 masklen(announcements.network) AS sort_a,
394 masklen(networks.network) AS sort_b
395 FROM
396 known_networks
397 LEFT JOIN
398 announcements ON known_networks.network <<= announcements.network
399 LEFT JOIN
400 networks ON known_networks.network <<= networks.network
401 ORDER BY
402 known_networks.network,
403 sort_a DESC,
404 sort_b DESC
405 )
406
5372d9c7
MT
407 -- Return a list of those networks enriched with all
408 -- other information that we store in the database
0983f3dd 409 SELECT
bbea93a7
MT
410 DISTINCT ON (network)
411 network,
412 autnum,
bd1aa6a1
MT
413
414 -- Country
415 COALESCE(
416 (
476297aa
MT
417 SELECT
418 country
419 FROM
420 network_overrides overrides
421 WHERE
422 networks.network <<= overrides.network
423 ORDER BY
424 masklen(overrides.network) DESC
425 LIMIT 1
bd1aa6a1
MT
426 ),
427 (
476297aa
MT
428 SELECT
429 country
430 FROM
431 autnum_overrides overrides
432 WHERE
433 networks.autnum = overrides.number
bd1aa6a1 434 ),
c923c1cc 435 (
476297aa
MT
436 SELECT
437 country
438 FROM
439 network_feeds feeds
440 WHERE
441 networks.network <<= feeds.network
442 ORDER BY
443 masklen(feeds.network) DESC
444 LIMIT 1
c923c1cc 445 ),
7808c8b7 446 (
476297aa
MT
447 SELECT
448 country
449 FROM
450 autnum_feeds feeds
451 WHERE
452 networks.autnum = feeds.number
453 ORDER BY
454 source
455 LIMIT 1
7808c8b7 456 ),
e87dbd44
MT
457 (
458 SELECT
459 geofeed_networks.country AS country
460 FROM
461 network_geofeeds
462
463 -- Join the data from the geofeeds
464 LEFT JOIN
465 geofeeds ON network_geofeeds.url = geofeeds.url
466 LEFT JOIN
467 geofeed_networks ON geofeeds.id = geofeed_networks.geofeed_id
468
469 -- Check whether we have a geofeed for this network
470 WHERE
471 networks.network <<= network_geofeeds.network
472 AND
473 networks.network <<= geofeed_networks.network
474
475 -- Filter for the best result
476 ORDER BY
477 masklen(geofeed_networks.network) DESC
478 LIMIT 1
479 ),
bd1aa6a1
MT
480 networks.country
481 ) AS country,
8e8555bb 482
0983f3dd 483 -- Flags
1422b5d4
MT
484 COALESCE(
485 (
476297aa
MT
486 SELECT
487 is_anonymous_proxy
488 FROM
489 network_overrides overrides
490 WHERE
491 networks.network <<= overrides.network
492 ORDER BY
493 masklen(overrides.network) DESC
494 LIMIT 1
1422b5d4 495 ),
c923c1cc 496 (
476297aa
MT
497 SELECT
498 is_anonymous_proxy
499 FROM
500 network_feeds feeds
501 WHERE
502 networks.network <<= feeds.network
503 ORDER BY
504 masklen(feeds.network) DESC
505 LIMIT 1
c923c1cc 506 ),
7808c8b7 507 (
476297aa
MT
508 SELECT
509 is_anonymous_proxy
510 FROM
511 autnum_feeds feeds
512 WHERE
513 networks.autnum = feeds.number
514 ORDER BY
515 source
516 LIMIT 1
7808c8b7 517 ),
1422b5d4 518 (
476297aa
MT
519 SELECT
520 is_anonymous_proxy
521 FROM
522 autnum_overrides overrides
523 WHERE
524 networks.autnum = overrides.number
b8e25b71
MT
525 ),
526 FALSE
1422b5d4
MT
527 ) AS is_anonymous_proxy,
528 COALESCE(
529 (
476297aa
MT
530 SELECT
531 is_satellite_provider
532 FROM
533 network_overrides overrides
534 WHERE
535 networks.network <<= overrides.network
536 ORDER BY
537 masklen(overrides.network) DESC
538 LIMIT 1
1422b5d4 539 ),
c923c1cc 540 (
476297aa
MT
541 SELECT
542 is_satellite_provider
543 FROM
544 network_feeds feeds
545 WHERE
546 networks.network <<= feeds.network
547 ORDER BY
548 masklen(feeds.network) DESC
549 LIMIT 1
c923c1cc 550 ),
7808c8b7 551 (
476297aa
MT
552 SELECT
553 is_satellite_provider
554 FROM
555 autnum_feeds feeds
556 WHERE
557 networks.autnum = feeds.number
558 ORDER BY
559 source
560 LIMIT 1
7808c8b7 561 ),
1422b5d4 562 (
476297aa
MT
563 SELECT
564 is_satellite_provider
565 FROM
566 autnum_overrides overrides
567 WHERE
568 networks.autnum = overrides.number
b8e25b71
MT
569 ),
570 FALSE
1422b5d4
MT
571 ) AS is_satellite_provider,
572 COALESCE(
573 (
476297aa
MT
574 SELECT
575 is_anycast
576 FROM
577 network_overrides overrides
578 WHERE
579 networks.network <<= overrides.network
580 ORDER BY
581 masklen(overrides.network) DESC
582 LIMIT 1
1422b5d4 583 ),
c923c1cc 584 (
476297aa
MT
585 SELECT
586 is_anycast
587 FROM
588 network_feeds feeds
589 WHERE
590 networks.network <<= feeds.network
591 ORDER BY
592 masklen(feeds.network) DESC
593 LIMIT 1
c923c1cc 594 ),
7808c8b7 595 (
476297aa
MT
596 SELECT
597 is_anycast
598 FROM
599 autnum_feeds feeds
600 WHERE
601 networks.autnum = feeds.number
602 ORDER BY
603 source
604 LIMIT 1
7808c8b7 605 ),
1422b5d4 606 (
476297aa
MT
607 SELECT
608 is_anycast
609 FROM
610 autnum_overrides overrides
611 WHERE
612 networks.autnum = overrides.number
b8e25b71
MT
613 ),
614 FALSE
e17e804e
PM
615 ) AS is_anycast,
616 COALESCE(
617 (
476297aa
MT
618 SELECT
619 is_drop
620 FROM
621 network_overrides overrides
622 WHERE
623 networks.network <<= overrides.network
624 ORDER BY
625 masklen(overrides.network) DESC
626 LIMIT 1
e17e804e 627 ),
c923c1cc 628 (
476297aa
MT
629 SELECT
630 is_drop
631 FROM
632 network_feeds feeds
633 WHERE
634 networks.network <<= feeds.network
635 ORDER BY
636 masklen(feeds.network) DESC
637 LIMIT 1
c923c1cc 638 ),
7808c8b7 639 (
476297aa
MT
640 SELECT
641 is_drop
642 FROM
643 autnum_feeds feeds
644 WHERE
645 networks.autnum = feeds.number
646 ORDER BY
647 source
648 LIMIT 1
7808c8b7 649 ),
e17e804e 650 (
476297aa
MT
651 SELECT
652 is_drop
653 FROM
654 autnum_overrides overrides
655 WHERE
656 networks.autnum = overrides.number
e17e804e
PM
657 ),
658 FALSE
659 ) AS is_drop
295a7774
MT
660 FROM
661 ordered_networks networks
0983f3dd
MT
662 """)
663
664 for row in rows:
665 network = writer.add_network(row.network)
666
5372d9c7
MT
667 # Save country
668 if row.country:
669 network.country_code = row.country
670
671 # Save ASN
672 if row.autnum:
673 network.asn = row.autnum
0983f3dd
MT
674
675 # Set flags
676 if row.is_anonymous_proxy:
677 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
678
679 if row.is_satellite_provider:
680 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
681
682 if row.is_anycast:
683 network.set_flag(location.NETWORK_FLAG_ANYCAST)
684
e17e804e
PM
685 if row.is_drop:
686 network.set_flag(location.NETWORK_FLAG_DROP)
687
8084b33a
MT
688 # Add all countries
689 log.info("Writing countries...")
690 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
691
692 for row in rows:
693 c = writer.add_country(row.country_code)
694 c.continent_code = row.continent_code
695 c.name = row.name
696
0983f3dd
MT
697 # Write everything to file
698 log.info("Writing database to file...")
699 for file in ns.file:
700 writer.write(file)
701
98d9079b 702 async def handle_update_whois(self, ns):
3ce28dea 703 # Did we run successfully?
8c2dbbc2
MT
704 success = True
705
706 sources = (
707 # African Network Information Centre
708 ("AFRINIC", (
709 (self._import_standard_format, "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz"),
710 )),
711
712 # Asia Pacific Network Information Centre
713 ("APNIC", (
714 (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz"),
715 (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz"),
716 (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz"),
717 (self._import_standard_format, "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz"),
718 )),
719
720 # American Registry for Internet Numbers
721 ("ARIN", (
722 (self._import_extended_format, "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"),
723 (self._import_arin_as_names, "https://ftp.arin.net/pub/resource_registry_service/asns.csv"),
724 )),
725
726 # Japan Network Information Center
727 ("JPNIC", (
728 (self._import_standard_format, "https://ftp.nic.ad.jp/jpirr/jpirr.db.gz"),
729 )),
730
731 # Latin America and Caribbean Network Information Centre
732 ("LACNIC", (
733 (self._import_standard_format, "https://ftp.lacnic.net/lacnic/dbase/lacnic.db.gz"),
734 (self._import_extended_format, "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"),
735 )),
736
737 # Réseaux IP Européens
738 ("RIPE", (
739 (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz"),
740 (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz"),
741 (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz"),
742 (self._import_standard_format, "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz"),
743 )),
744 )
84b175e2 745
fa9220b0 746 # Fetch all valid country codes to check parsed networks against
d4697973
MT
747 countries = self.fetch_countries()
748
749 # Check if we have countries
750 if not countries:
751 log.error("Please import countries before importing any WHOIS data")
752 return 1
6ffd06b5 753
3ce28dea 754 # Iterate over all potential sources
8c2dbbc2 755 for name, feeds in sources:
d77d05d1 756 # Skip anything that should not be updated
8c2dbbc2 757 if ns.sources and not name in ns.sources:
d77d05d1
MT
758 continue
759
8c2dbbc2 760 try:
98d9079b 761 await self._process_source(name, feeds, countries)
002deb6b 762
8c2dbbc2
MT
763 # Log an error but continue if an exception occurs
764 except Exception as e:
765 log.error("Error processing source %s" % name, exc_info=True)
766 success = False
767
768 # Return a non-zero exit code for errors
769 return 0 if success else 1
770
98d9079b 771 async def _process_source(self, source, feeds, countries):
8c2dbbc2
MT
772 """
773 This function processes one source
774 """
775 # Wrap everything into one large transaction
776 with self.db.transaction():
777 # Remove all previously imported content
b8f3075b
MT
778 self.db.execute("DELETE FROM autnums WHERE source = %s", source)
779 self.db.execute("DELETE FROM networks WHERE source = %s", source)
780 self.db.execute("DELETE FROM network_geofeeds WHERE source = %s", source)
8c2dbbc2
MT
781
782 # Create some temporary tables to store parsed data
783 self.db.execute("""
784 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
785 organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
786 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
787
788 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
789 name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
790 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
791
792 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text,
793 original_countries text[] NOT NULL, source text NOT NULL)
794 ON COMMIT DROP;
795 CREATE INDEX _rirdata_search ON _rirdata
796 USING BTREE(family(network), masklen(network));
797 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
798 """)
002deb6b 799
8c2dbbc2
MT
800 # Parse all feeds
801 for callback, url, *args in feeds:
802 # Retrieve the feed
335946c0 803 f = self.downloader.retrieve(url)
8c2dbbc2
MT
804
805 # Call the callback
c86209e2
MT
806 with self.db.pipeline():
807 await callback(source, countries, f, *args)
8c2dbbc2
MT
808
809 # Process all parsed networks from every RIR we happen to have access to,
810 # insert the largest network chunks into the networks table immediately...
811 families = self.db.query("""
812 SELECT DISTINCT
813 family(network) AS family
814 FROM
815 _rirdata
816 ORDER BY
817 family(network)
818 """,
819 )
820
821 for family in (row.family for row in families):
822 # Fetch the smallest mask length in our data set
823 smallest = self.db.get("""
824 SELECT
825 MIN(
826 masklen(network)
827 ) AS prefix
fa9220b0
MT
828 FROM
829 _rirdata
8c2dbbc2
MT
830 WHERE
831 family(network) = %s
832 """, family,
fa9220b0 833 )
3ce28dea 834
8c2dbbc2
MT
835 # Copy all networks
836 self.db.execute("""
837 INSERT INTO
838 networks
839 (
840 network,
841 country,
842 original_countries,
843 source
5bfa1bb4 844 )
8c2dbbc2
MT
845 SELECT
846 network,
847 country,
848 original_countries,
849 source
850 FROM
851 _rirdata
852 WHERE
853 masklen(network) = %s
854 AND
855 family(network) = %s
856 ON CONFLICT DO
857 NOTHING""",
858 smallest.prefix,
859 family,
860 )
861
862 # ... determine any other prefixes for this network family, ...
863 prefixes = self.db.query("""
864 SELECT
865 DISTINCT masklen(network) AS prefix
866 FROM
867 _rirdata
868 WHERE
869 family(network) = %s
870 ORDER BY
871 masklen(network) ASC
872 OFFSET 1
873 """, family,
874 )
5bfa1bb4 875
8c2dbbc2
MT
876 # ... and insert networks with this prefix in case they provide additional
877 # information (i. e. subnet of a larger chunk with a different country)
878 for prefix in (row.prefix for row in prefixes):
5bfa1bb4 879 self.db.execute("""
8c2dbbc2
MT
880 WITH candidates AS (
881 SELECT
882 _rirdata.network,
883 _rirdata.country,
884 _rirdata.original_countries,
885 _rirdata.source
886 FROM
887 _rirdata
888 WHERE
889 family(_rirdata.network) = %s
890 AND
891 masklen(_rirdata.network) = %s
892 ),
893 filtered AS (
894 SELECT
895 DISTINCT ON (c.network)
896 c.network,
897 c.country,
898 c.original_countries,
899 c.source,
900 masklen(networks.network),
901 networks.country AS parent_country
902 FROM
903 candidates c
904 LEFT JOIN
905 networks
906 ON
907 c.network << networks.network
908 ORDER BY
909 c.network,
910 masklen(networks.network) DESC NULLS LAST
5bfa1bb4 911 )
8c2dbbc2
MT
912 INSERT INTO
913 networks(network, country, original_countries, source)
5bfa1bb4
MT
914 SELECT
915 network,
916 country,
917 original_countries,
918 source
919 FROM
8c2dbbc2 920 filtered
5bfa1bb4 921 WHERE
8c2dbbc2
MT
922 parent_country IS NULL
923 OR
924 country <> parent_country
925 ON CONFLICT DO NOTHING
926 """, family, prefix,
5bfa1bb4 927 )
3ce28dea 928
8c2dbbc2
MT
929 self.db.execute("""
930 INSERT INTO
931 autnums
932 (
933 number,
934 name,
935 source
936 )
937 SELECT
938 _autnums.number,
939 _organizations.name,
940 _organizations.source
941 FROM
942 _autnums
943 JOIN
944 _organizations ON _autnums.organization = _organizations.handle
945 ON CONFLICT
946 (
947 number
948 )
949 DO UPDATE
950 SET name = excluded.name
951 """,
952 )
3ce28dea 953
98d9079b 954 async def _import_standard_format(self, source, countries, f, *args):
8c2dbbc2
MT
955 """
956 Imports a single standard format source feed
957 """
958 # Iterate over all blocks
8c3529c7 959 for block in iterate_over_blocks(f):
8c2dbbc2
MT
960 self._parse_block(block, source, countries)
961
98d9079b 962 async def _import_extended_format(self, source, countries, f, *args):
8c2dbbc2 963 # Iterate over all lines
8c3529c7 964 for line in iterate_over_lines(f):
c82e2eab 965 self._parse_line(line, source, countries)
8c2dbbc2 966
98d9079b 967 async def _import_arin_as_names(self, source, countries, f, *args):
fa5dccf1
MT
968 # Wrap the data to text
969 f = io.TextIOWrapper(f)
970
8c2dbbc2 971 # Walk through the file
932a7552 972 for line in csv.DictReader(f, dialect="arin"):
8c2dbbc2
MT
973 # Fetch status
974 status = line.get("Status")
975
976 # We are only interested in anything managed by ARIN
977 if not status == "Full Registry Services":
978 continue
979
980 # Fetch organization name
981 name = line.get("Org Name")
982
983 # Extract ASNs
984 first_asn = line.get("Start AS Number")
985 last_asn = line.get("End AS Number")
986
987 # Cast to a number
988 try:
989 first_asn = int(first_asn)
990 except TypeError as e:
991 log.warning("Could not parse ASN '%s'" % first_asn)
992 continue
993
994 try:
995 last_asn = int(last_asn)
996 except TypeError as e:
997 log.warning("Could not parse ASN '%s'" % last_asn)
998 continue
999
1000 # Check if the range is valid
1001 if last_asn < first_asn:
1002 log.warning("Invalid ASN range %s-%s" % (first_asn, last_asn))
1003
1004 # Insert everything into the database
1005 for asn in range(first_asn, last_asn + 1):
1006 if not self._check_parsed_asn(asn):
1007 log.warning("Skipping invalid ASN %s" % asn)
1008 continue
0365119d 1009
3ce28dea 1010 self.db.execute("""
fa9220b0
MT
1011 INSERT INTO
1012 autnums
1013 (
1014 number,
1015 name,
1016 source
1017 )
8c2dbbc2
MT
1018 VALUES
1019 (
1020 %s, %s, %s
1021 )
fa9220b0
MT
1022 ON CONFLICT
1023 (
1024 number
1025 )
8c2dbbc2
MT
1026 DO NOTHING
1027 """, asn, name, "ARIN",
fa9220b0 1028 )
429a43d1 1029
bd341642
PM
1030 def _check_parsed_network(self, network):
1031 """
1032 Assistive function to detect and subsequently sort out parsed
1033 networks from RIR data (both Whois and so-called "extended sources"),
1034 which are or have...
1035
1036 (a) not globally routable (RFC 1918 space, et al.)
1037 (b) covering a too large chunk of the IP address space (prefix length
1038 is < 7 for IPv4 networks, and < 10 for IPv6)
1039 (c) "0.0.0.0" or "::" as a network address
bd341642
PM
1040
1041 This unfortunately is necessary due to brain-dead clutter across
1042 various RIR databases, causing mismatches and eventually disruptions.
1043
1044 We will return False in case a network is not suitable for adding
1045 it to our database, and True otherwise.
1046 """
d9b0a6ea
MT
1047 # Check input
1048 if isinstance(network, ipaddress.IPv6Network):
1049 pass
1050 elif isinstance(network, ipaddress.IPv4Network):
1051 pass
1052 else:
1053 raise ValueError("Invalid network: %s (type %s)" % (network, type(network)))
bd341642 1054
cb45f0d5 1055 # Ignore anything that isn't globally routable
bd341642 1056 if not network.is_global:
2ba6ed07 1057 log.debug("Skipping non-globally routable network: %s" % network)
bd341642
PM
1058 return False
1059
cb45f0d5
MT
1060 # Ignore anything that is unspecified IP range (See RFC 5735 for IPv4 or RFC 2373 for IPv6)
1061 elif network.is_unspecified:
1062 log.debug("Skipping unspecified network: %s" % network)
1063 return False
1064
c940dcf1
MT
1065 # IPv6
1066 if network.version == 6:
bd341642 1067 if network.prefixlen < 10:
2ba6ed07 1068 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
1069 return False
1070
c940dcf1
MT
1071 # IPv4
1072 elif network.version == 4:
1073 if network.prefixlen < 7:
1074 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
1075 return False
1076
bd341642
PM
1077 # In case we have made it here, the network is considered to
1078 # be suitable for libloc consumption...
1079 return True
1080
43fe570c
PM
1081 def _check_parsed_asn(self, asn):
1082 """
1083 Assistive function to filter Autonomous System Numbers not being suitable
1084 for adding to our database. Returns False in such cases, and True otherwise.
1085 """
1086
1087 for start, end in VALID_ASN_RANGES:
1088 if start <= asn and end >= asn:
1089 return True
1090
1091 log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
1092 return False
1093
d4697973 1094 def _parse_block(self, block, source_key, countries):
6ffd06b5
MT
1095 # Get first line to find out what type of block this is
1096 line = block[0]
1097
6ffd06b5 1098 # aut-num
429a43d1 1099 if line.startswith("aut-num:"):
28b08385 1100 return self._parse_autnum_block(block, source_key)
6ffd06b5 1101
aadac4c5
PM
1102 # inetnum
1103 if line.startswith("inet6num:") or line.startswith("inetnum:"):
d4697973 1104 return self._parse_inetnum_block(block, source_key, countries)
aadac4c5 1105
6ffd06b5
MT
1106 # organisation
1107 elif line.startswith("organisation:"):
28b08385 1108 return self._parse_org_block(block, source_key)
6ffd06b5 1109
28b08385 1110 def _parse_autnum_block(self, block, source_key):
6ffd06b5
MT
1111 autnum = {}
1112 for line in block:
1113 # Split line
1114 key, val = split_line(line)
1115
1116 if key == "aut-num":
1117 m = re.match(r"^(AS|as)(\d+)", val)
1118 if m:
1119 autnum["asn"] = m.group(2)
1120
0365119d 1121 elif key == "org":
e7d612e5 1122 autnum[key] = val.upper()
6ffd06b5 1123
426e0bee
PM
1124 elif key == "descr":
1125 # Save the first description line as well...
1126 if not key in autnum:
1127 autnum[key] = val
1128
6ffd06b5 1129 # Skip empty objects
426e0bee 1130 if not autnum or not "asn" in autnum:
6ffd06b5
MT
1131 return
1132
426e0bee
PM
1133 # Insert a dummy organisation handle into our temporary organisations
1134 # table in case the AS does not have an organisation handle set, but
1135 # has a description (a quirk often observed in APNIC area), so we can
1136 # later display at least some string for this AS.
1137 if not "org" in autnum:
1138 if "descr" in autnum:
1139 autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn")
1140
1141 self.db.execute("INSERT INTO _organizations(handle, name, source) \
1142 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
1143 autnum.get("org"), autnum.get("descr"), source_key,
1144 )
1145 else:
1146 log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
1147 autnum.get("asn"))
1148 return
1149
6ffd06b5 1150 # Insert into database
28b08385
PM
1151 self.db.execute("INSERT INTO _autnums(number, organization, source) \
1152 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
0365119d 1153 organization = excluded.organization",
28b08385 1154 autnum.get("asn"), autnum.get("org"), source_key,
6ffd06b5
MT
1155 )
1156
d4697973 1157 def _parse_inetnum_block(self, block, source_key, countries):
aadac4c5
PM
1158 inetnum = {}
1159 for line in block:
aadac4c5
PM
1160 # Split line
1161 key, val = split_line(line)
1162
84187ab5
PM
1163 # Filter any inetnum records which are only referring to IP space
1164 # not managed by that specific RIR...
1165 if key == "netname":
5254e5fc 1166 if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()):
2ba6ed07 1167 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
84187ab5
PM
1168 return
1169
aadac4c5
PM
1170 if key == "inetnum":
1171 start_address, delim, end_address = val.partition("-")
1172
1173 # Strip any excess space
1174 start_address, end_address = start_address.rstrip(), end_address.strip()
1175
845da577
PM
1176 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
1177 if start_address and not (delim or end_address):
1178 try:
1179 start_address = ipaddress.ip_network(start_address, strict=False)
1180 except ValueError:
1181 start_address = start_address.split("/")
38d0faea 1182 ldigits = start_address[0].count(".")
845da577
PM
1183
1184 # How many octets do we need to add?
1185 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
2ad36187 1186 if ldigits == 1:
845da577 1187 start_address = start_address[0] + ".0.0/" + start_address[1]
2ad36187 1188 elif ldigits == 2:
845da577
PM
1189 start_address = start_address[0] + ".0/" + start_address[1]
1190 else:
1191 log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line)
1192 return
1193
1194 try:
1195 start_address = ipaddress.ip_network(start_address, strict=False)
1196 except ValueError:
1197 log.warning("Could not parse line in LACNIC DB format: %s" % line)
1198 return
1199
1200 # Enumerate first and last IP address of this network
1201 end_address = start_address[-1]
1202 start_address = start_address[0]
1203
1204 else:
1205 # Convert to IP address
1206 try:
1207 start_address = ipaddress.ip_address(start_address)
1208 end_address = ipaddress.ip_address(end_address)
1209 except ValueError:
1210 log.warning("Could not parse line: %s" % line)
1211 return
aadac4c5 1212
1814283b 1213 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
aadac4c5
PM
1214
1215 elif key == "inet6num":
1814283b 1216 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
aadac4c5
PM
1217
1218 elif key == "country":
5487ee0d 1219 cc = val.upper()
b6b2b331 1220
30d32299 1221 # Ignore certain country codes
5487ee0d
MT
1222 if cc in IGNORED_COUNTRIES:
1223 log.debug("Ignoring country code '%s'" % cc)
30d32299
MT
1224 continue
1225
afc966db
MT
1226 # Translate country codes
1227 try:
5487ee0d 1228 cc = TRANSLATED_COUNTRIES[cc]
afc966db
MT
1229 except KeyError:
1230 pass
01e5f0ff 1231
a9e72406
MT
1232 # Do we know this country?
1233 if not cc in countries:
1234 log.warning("Skipping invalid country code '%s'" % cc)
1235 continue
1236
5487ee0d
MT
1237 try:
1238 inetnum[key].append(cc)
1239 except KeyError:
1240 inetnum[key] = [cc]
aadac4c5 1241
183b2f74
MT
1242 # Parse the geofeed attribute
1243 elif key == "geofeed":
1244 inetnum["geofeed"] = val
1245
1246 # Parse geofeed when used as a remark
45c0536c
MT
1247 elif key == "remarks":
1248 m = re.match(r"^(?:Geofeed)\s+(https://.*)", val)
183b2f74
MT
1249 if m:
1250 inetnum["geofeed"] = m.group(1)
1251
aadac4c5 1252 # Skip empty objects
39bfa052 1253 if not inetnum:
aadac4c5
PM
1254 return
1255
1814283b
PM
1256 # Iterate through all networks enumerated from above, check them for plausibility and insert
1257 # them into the database, if _check_parsed_network() succeeded
1258 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
39bfa052
MT
1259 if not self._check_parsed_network(single_network):
1260 continue
1261
1262 # Fetch the countries or use a list with an empty country
1263 countries = inetnum.get("country", [None])
1264
1265 # Insert the network into the database but only use the first country code
1266 for cc in countries:
1267 self.db.execute("""
1268 INSERT INTO
1269 _rirdata
1270 (
1271 network,
1272 country,
1273 original_countries,
1274 source
1275 )
1276 VALUES
1277 (
1278 %s, %s, %s, %s
1279 )
1280 ON CONFLICT (network)
1281 DO UPDATE SET country = excluded.country
1282 """, "%s" % single_network, cc, [cc for cc in countries if cc], source_key,
1814283b 1283 )
aadac4c5 1284
39bfa052
MT
1285 # If there are more than one country, we will only use the first one
1286 break
183b2f74 1287
39bfa052
MT
1288 # Update any geofeed information
1289 geofeed = inetnum.get("geofeed", None)
1290 if geofeed:
b8f3075b 1291 self._parse_geofeed(source_key, geofeed, single_network)
39bfa052 1292
b8f3075b 1293 def _parse_geofeed(self, source, url, single_network):
da832d44
MT
1294 # Parse the URL
1295 url = urllib.parse.urlparse(url)
1296
1297 # Make sure that this is a HTTPS URL
1298 if not url.scheme == "https":
1299 log.debug("Geofeed URL is not using HTTPS: %s" % geofeed)
1300 return
1301
1302 # Put the URL back together normalized
1303 url = url.geturl()
1304
1305 # Store/update any geofeeds
1306 self.db.execute("""
1307 INSERT INTO
b8f3075b
MT
1308 network_geofeeds
1309 (
1310 network,
1311 url,
1312 source
1313 )
1314 VALUES
1315 (
1316 %s, %s, %s
da832d44
MT
1317 )
1318 ON CONFLICT (network) DO
1319 UPDATE SET url = excluded.url""",
b8f3075b 1320 "%s" % single_network, url, source,
da832d44
MT
1321 )
1322
28b08385 1323 def _parse_org_block(self, block, source_key):
6ffd06b5
MT
1324 org = {}
1325 for line in block:
1326 # Split line
1327 key, val = split_line(line)
1328
e7d612e5
PM
1329 if key == "organisation":
1330 org[key] = val.upper()
1331 elif key == "org-name":
6ffd06b5
MT
1332 org[key] = val
1333
1334 # Skip empty objects
1335 if not org:
1336 return
1337
28b08385
PM
1338 self.db.execute("INSERT INTO _organizations(handle, name, source) \
1339 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
0365119d 1340 UPDATE SET name = excluded.name",
28b08385 1341 org.get("organisation"), org.get("org-name"), source_key,
6ffd06b5
MT
1342 )
1343
4b931f6c 1344 def _parse_line(self, line, source_key, validcountries=None):
429a43d1
MT
1345 # Skip version line
1346 if line.startswith("2"):
1347 return
6ffd06b5 1348
429a43d1
MT
1349 # Skip comments
1350 if line.startswith("#"):
1351 return
6ffd06b5 1352
429a43d1
MT
1353 try:
1354 registry, country_code, type, line = line.split("|", 3)
1355 except:
1356 log.warning("Could not parse line: %s" % line)
1357 return
6ffd06b5 1358
9a9b0698
MT
1359 # Skip ASN
1360 if type == "asn":
1361 return
1362
4b931f6c 1363 # Skip any unknown protocols
9a9b0698 1364 elif not type in ("ipv6", "ipv4"):
4b931f6c
MT
1365 log.warning("Unknown IP protocol '%s'" % type)
1366 return
1367
84b175e2
PM
1368 # Skip any lines that are for stats only or do not have a country
1369 # code at all (avoids log spam below)
1370 if not country_code or country_code == '*':
1371 return
1372
1373 # Skip objects with unknown country codes
1374 if validcountries and country_code not in validcountries:
1375 log.warning("Skipping line with bogus country '%s': %s" % \
1376 (country_code, line))
6ffd06b5
MT
1377 return
1378
429a43d1
MT
1379 try:
1380 address, prefix, date, status, organization = line.split("|")
1381 except ValueError:
1382 organization = None
1383
1384 # Try parsing the line without organization
1385 try:
1386 address, prefix, date, status = line.split("|")
1387 except ValueError:
1388 log.warning("Unhandled line format: %s" % line)
1389 return
1390
1391 # Skip anything that isn't properly assigned
1392 if not status in ("assigned", "allocated"):
1393 return
1394
1395 # Cast prefix into an integer
1396 try:
1397 prefix = int(prefix)
1398 except:
1399 log.warning("Invalid prefix: %s" % prefix)
7177031f 1400 return
429a43d1
MT
1401
1402 # Fix prefix length for IPv4
1403 if type == "ipv4":
1404 prefix = 32 - int(math.log(prefix, 2))
1405
1406 # Try to parse the address
1407 try:
1408 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
1409 except ValueError:
1410 log.warning("Invalid IP address: %s" % address)
1411 return
1412
bd341642
PM
1413 if not self._check_parsed_network(network):
1414 return
1415
4b931f6c
MT
1416 self.db.execute("""
1417 INSERT INTO
1418 networks
1419 (
1420 network,
1421 country,
1422 original_countries,
1423 source
1424 )
1425 VALUES
1426 (
1427 %s, %s, %s, %s
1428 )
1429 ON CONFLICT (network)
1430 DO UPDATE SET country = excluded.country
60f09cda 1431 """, "%s" % network, country_code, [country_code], source_key,
6ffd06b5
MT
1432 )
1433
98d9079b 1434 async def handle_update_announcements(self, ns):
83d61c46
MT
1435 server = ns.server[0]
1436
22d8d199
MT
1437 with self.db.transaction():
1438 if server.startswith("/"):
98d9079b 1439 await self._handle_update_announcements_from_bird(server)
22d8d199
MT
1440
1441 # Purge anything we never want here
1442 self.db.execute("""
1443 -- Delete default routes
1444 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
1445
1446 -- Delete anything that is not global unicast address space
1447 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
1448
1449 -- DELETE "current network" address space
1450 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
1451
1452 -- DELETE local loopback address space
1453 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
1454
1455 -- DELETE RFC 1918 address space
1456 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
1457 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
1458 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
1459
1460 -- DELETE test, benchmark and documentation address space
1461 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
1462 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
1463 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
1464 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
1465 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
1466
1467 -- DELETE CGNAT address space (RFC 6598)
1468 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
1469
1470 -- DELETE link local address space
1471 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
1472
b4d5b2a6 1473 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
22d8d199 1474 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
b4d5b2a6 1475 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
22d8d199
MT
1476
1477 -- DELETE multicast and reserved address space
1478 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
1479 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
1480
1481 -- Delete networks that are too small to be in the global routing table
1482 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
1483 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
1484
1485 -- Delete any non-public or reserved ASNs
1486 DELETE FROM announcements WHERE NOT (
1487 (autnum >= 1 AND autnum <= 23455)
1488 OR
1489 (autnum >= 23457 AND autnum <= 64495)
1490 OR
1491 (autnum >= 131072 AND autnum <= 4199999999)
1492 );
1493
1494 -- Delete everything that we have not seen for 14 days
1495 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
1496 """)
1497
98d9079b 1498 async def _handle_update_announcements_from_bird(self, server):
22d8d199 1499 # Pre-compile the regular expression for faster searching
35e5f041 1500 route = re.compile(b"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
22d8d199
MT
1501
1502 log.info("Requesting routing table from Bird (%s)" % server)
1503
35e5f041
MT
1504 aggregated_networks = []
1505
22d8d199
MT
1506 # Send command to list all routes
1507 for line in self._bird_cmd(server, "show route"):
1508 m = route.match(line)
1509 if not m:
b767db8d
MT
1510 # Skip empty lines
1511 if not line:
1512 pass
1513
45fa97cd 1514 # Ignore any header lines with the name of the routing table
b767db8d
MT
1515 elif line.startswith(b"Table"):
1516 pass
45fa97cd
MT
1517
1518 # Log anything else
b767db8d
MT
1519 else:
1520 log.debug("Could not parse line: %s" % line.decode())
1521
22d8d199
MT
1522 continue
1523
1524 # Fetch the extracted network and ASN
1525 network, autnum = m.groups()
1526
35e5f041
MT
1527 # Decode into strings
1528 if network:
1529 network = network.decode()
1530 if autnum:
1531 autnum = autnum.decode()
1532
1533 # Collect all aggregated networks
1534 if not autnum:
1535 log.debug("%s is an aggregated network" % network)
1536 aggregated_networks.append(network)
1537 continue
1538
22d8d199
MT
1539 # Insert it into the database
1540 self.db.execute("INSERT INTO announcements(network, autnum) \
1541 VALUES(%s, %s) ON CONFLICT (network) DO \
1542 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
35e5f041 1543 network, autnum,
22d8d199
MT
1544 )
1545
35e5f041
MT
1546 # Process any aggregated networks
1547 for network in aggregated_networks:
1548 log.debug("Processing aggregated network %s" % network)
1549
1550 # Run "show route all" for each network
1551 for line in self._bird_cmd(server, "show route %s all" % network):
1552 # Try finding the path
1553 m = re.match(b"\s+BGP\.as_path:.* (\d+) {\d+}$", line)
1554 if m:
1555 # Select the last AS number in the path
1556 autnum = m.group(1).decode()
1557
1558 # Insert it into the database
1559 self.db.execute("INSERT INTO announcements(network, autnum) \
1560 VALUES(%s, %s) ON CONFLICT (network) DO \
1561 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1562 network, autnum,
1563 )
1564
1565 # We don't need to process any more
1566 break
1567
22d8d199
MT
1568 def _bird_cmd(self, socket_path, command):
1569 # Connect to the socket
1570 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1571 s.connect(socket_path)
cedee656 1572
22d8d199
MT
1573 # Allocate some buffer
1574 buffer = b""
83d61c46 1575
bbcefb38
MT
1576 log.debug("Sending Bird command: %s" % command)
1577
22d8d199
MT
1578 # Send the command
1579 s.send(b"%s\n" % command.encode())
209c04b6 1580
22d8d199
MT
1581 while True:
1582 # Fill up the buffer
1583 buffer += s.recv(4096)
209c04b6 1584
22d8d199
MT
1585 while True:
1586 # Search for the next newline
1587 pos = buffer.find(b"\n")
209c04b6 1588
22d8d199
MT
1589 # If we cannot find one, we go back and read more data
1590 if pos <= 0:
1591 break
209c04b6 1592
22d8d199
MT
1593 # Cut after the newline character
1594 pos += 1
b89cee80 1595
22d8d199
MT
1596 # Split the line we want and keep the rest in buffer
1597 line, buffer = buffer[:pos], buffer[pos:]
83d61c46 1598
bbcefb38
MT
1599 # Try parsing any status lines
1600 if len(line) > 4 and line[:4].isdigit() and line[4] in (32, 45):
1601 code, delim, line = int(line[:4]), line[4], line[5:]
1602
1603 log.debug("Received response code %s from bird" % code)
1604
1605 # End of output
1606 if code == 0:
1607 return
1608
1609 # Ignore hello line
1610 elif code == 1:
1611 continue
83d61c46 1612
22d8d199
MT
1613 # Otherwise return the line
1614 yield line
83d61c46 1615
98d9079b 1616 async def handle_update_geofeeds(self, ns):
ef8d1f67
MT
1617 # Sync geofeeds
1618 with self.db.transaction():
1619 # Delete all geofeeds which are no longer linked
1620 self.db.execute("""
1621 DELETE FROM
1622 geofeeds
1623 WHERE
3efe0297 1624 geofeeds.url NOT IN (
ef8d1f67 1625 SELECT
3efe0297 1626 network_geofeeds.url
ef8d1f67
MT
1627 FROM
1628 network_geofeeds
3efe0297
MT
1629
1630 UNION
1631
1632 SELECT
1633 geofeed_overrides.url
1634 FROM
1635 geofeed_overrides
1636 )
1637 """,
ef8d1f67
MT
1638 )
1639
1640 # Copy all geofeeds
1641 self.db.execute("""
3efe0297
MT
1642 WITH all_geofeeds AS (
1643 SELECT
1644 network_geofeeds.url
1645 FROM
1646 network_geofeeds
1647
1648 UNION
1649
1650 SELECT
1651 geofeed_overrides.url
1652 FROM
1653 geofeed_overrides
1654 )
ef8d1f67 1655 INSERT INTO
3efe0297
MT
1656 geofeeds
1657 (
1658 url
1659 )
ef8d1f67
MT
1660 SELECT
1661 url
1662 FROM
3efe0297 1663 all_geofeeds
ef8d1f67
MT
1664 ON CONFLICT (url)
1665 DO NOTHING
1666 """,
1667 )
1668
0fcdd689
MT
1669 # Fetch all Geofeeds that require an update
1670 geofeeds = self.db.query("""
1671 SELECT
1672 id,
1673 url
1674 FROM
1675 geofeeds
1676 WHERE
1677 updated_at IS NULL
1678 OR
1679 updated_at <= CURRENT_TIMESTAMP - INTERVAL '1 week'
1680 ORDER BY
1681 id
1682 """)
1683
98d9079b
MT
1684 ratelimiter = asyncio.Semaphore(32)
1685
430c51c2 1686 # Update all geofeeds
98d9079b
MT
1687 async with asyncio.TaskGroup() as tasks:
1688 for geofeed in geofeeds:
1689 task = tasks.create_task(
1690 self._fetch_geofeed(ratelimiter, geofeed),
1691 )
0fcdd689 1692
a4e50c7a
MT
1693 # Delete data from any feeds that did not update in the last two weeks
1694 with self.db.transaction():
1695 self.db.execute("""
1696 DELETE FROM
1697 geofeed_networks
1698 WHERE
1699 geofeed_networks.geofeed_id IN (
1700 SELECT
1701 geofeeds.id
1702 FROM
1703 geofeeds
1704 WHERE
1705 updated_at IS NULL
1706 OR
1707 updated_at <= CURRENT_TIMESTAMP - INTERVAL '2 weeks'
1708 )
1709 """)
1710
98d9079b
MT
1711 async def _fetch_geofeed(self, ratelimiter, geofeed):
1712 async with ratelimiter:
1713 log.debug("Fetching Geofeed %s" % geofeed.url)
0fcdd689 1714
98d9079b
MT
1715 with self.db.transaction():
1716 # Open the URL
1717 try:
1718 # Send the request
1719 f = await asyncio.to_thread(
776226b6
MT
1720 self.downloader.retrieve,
1721
1722 # Fetch the feed by its URL
1723 geofeed.url,
1724
1725 # Send some extra headers
98d9079b
MT
1726 headers={
1727 "User-Agent" : "location/%s" % location.__version__,
1728
1729 # We expect some plain text file in CSV format
1730 "Accept" : "text/csv, text/plain",
1731 },
1732
1733 # Don't wait longer than 10 seconds for a response
776226b6 1734 timeout=10,
98d9079b 1735 )
0fcdd689 1736
98d9079b
MT
1737 # Remove any previous data
1738 self.db.execute("DELETE FROM geofeed_networks \
1739 WHERE geofeed_id = %s", geofeed.id)
0fcdd689 1740
98d9079b 1741 lineno = 0
0fcdd689 1742
98d9079b 1743 # Read the output line by line
c86209e2
MT
1744 with self.db.pipeline():
1745 for line in f:
1746 lineno += 1
0fcdd689 1747
c86209e2
MT
1748 try:
1749 line = line.decode()
5e632485 1750
c86209e2
MT
1751 # Ignore any lines we cannot decode
1752 except UnicodeDecodeError:
1753 log.debug("Could not decode line %s in %s" \
1754 % (lineno, geofeed.url))
1755 continue
56f147ce 1756
c86209e2
MT
1757 # Strip any newline
1758 line = line.rstrip()
0fcdd689 1759
c86209e2
MT
1760 # Skip empty lines
1761 if not line:
1762 continue
0fcdd689 1763
c86209e2
MT
1764 # Skip comments
1765 elif line.startswith("#"):
1766 continue
990b3ad4 1767
c86209e2
MT
1768 # Try to parse the line
1769 try:
1770 fields = line.split(",", 5)
1771 except ValueError:
1772 log.debug("Could not parse line: %s" % line)
1773 continue
0fcdd689 1774
c86209e2
MT
1775 # Check if we have enough fields
1776 if len(fields) < 4:
1777 log.debug("Not enough fields in line: %s" % line)
1778 continue
0fcdd689 1779
c86209e2
MT
1780 # Fetch all fields
1781 network, country, region, city, = fields[:4]
0fcdd689 1782
c86209e2
MT
1783 # Try to parse the network
1784 try:
1785 network = ipaddress.ip_network(network, strict=False)
1786 except ValueError:
1787 log.debug("Could not parse network: %s" % network)
1788 continue
1789
1790 # Strip any excess whitespace from country codes
1791 country = country.strip()
1792
1793 # Make the country code uppercase
1794 country = country.upper()
1795
1796 # Check the country code
1797 if not country:
1798 log.debug("Empty country code in Geofeed %s line %s" \
1799 % (geofeed.url, lineno))
1800 continue
1801
1802 elif not location.country_code_is_valid(country):
1803 log.debug("Invalid country code in Geofeed %s:%s: %s" \
1804 % (geofeed.url, lineno, country))
1805 continue
1806
1807 # Write this into the database
1808 self.db.execute("""
1809 INSERT INTO
1810 geofeed_networks (
1811 geofeed_id,
1812 network,
1813 country,
1814 region,
1815 city
1816 )
1817 VALUES (%s, %s, %s, %s, %s)""",
1818 geofeed.id,
1819 "%s" % network,
1820 country,
1821 region,
1822 city,
1823 )
0fcdd689 1824
98d9079b
MT
1825 # Catch any HTTP errors
1826 except urllib.request.HTTPError as e:
1827 self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
1828 WHERE id = %s", e.code, "%s" % e, geofeed.id)
4ed16a14 1829
98d9079b
MT
1830 # Remove any previous data when the feed has been deleted
1831 if e.code == 404:
1832 self.db.execute("DELETE FROM geofeed_networks \
1833 WHERE geofeed_id = %s", geofeed.id)
72908243 1834
98d9079b
MT
1835 # Catch any other errors and connection timeouts
1836 except (http.client.InvalidURL, urllib.request.URLError, TimeoutError) as e:
1837 log.debug("Could not fetch URL %s: %s" % (geofeed.url, e))
5e632485 1838
98d9079b
MT
1839 self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
1840 WHERE id = %s", 599, "%s" % e, geofeed.id)
0fcdd689 1841
98d9079b
MT
1842 # Mark the geofeed as updated
1843 else:
81b28603 1844 self.db.execute("""
98d9079b
MT
1845 UPDATE
1846 geofeeds
1847 SET
1848 updated_at = CURRENT_TIMESTAMP,
1849 status = NULL,
1850 error = NULL
1851 WHERE
1852 id = %s""",
81b28603 1853 geofeed.id,
81b28603 1854 )
0fcdd689 1855
98d9079b 1856 async def handle_update_overrides(self, ns):
d7fc3057 1857 with self.db.transaction():
667ad134
MT
1858 # Drop any previous content
1859 self.db.execute("TRUNCATE TABLE autnum_overrides")
3efe0297 1860 self.db.execute("TRUNCATE TABLE geofeed_overrides")
667ad134 1861 self.db.execute("TRUNCATE TABLE network_overrides")
d7fc3057
MT
1862
1863 for file in ns.files:
1864 log.info("Reading %s..." % file)
1865
1866 with open(file, "rb") as f:
8c3529c7 1867 for type, block in read_blocks(f):
d7fc3057
MT
1868 if type == "net":
1869 network = block.get("net")
1870 # Try to parse and normalise the network
1871 try:
1872 network = ipaddress.ip_network(network, strict=False)
1873 except ValueError as e:
1874 log.warning("Invalid IP network: %s: %s" % (network, e))
1875 continue
1876
94dfab8c
MT
1877 # Prevent that we overwrite all networks
1878 if network.prefixlen == 0:
1879 log.warning("Skipping %s: You cannot overwrite default" % network)
1880 continue
1881
d7fc3057 1882 self.db.execute("""
667ad134
MT
1883 INSERT INTO
1884 network_overrides
1885 (
d7fc3057
MT
1886 network,
1887 country,
1888 is_anonymous_proxy,
1889 is_satellite_provider,
e17e804e
PM
1890 is_anycast,
1891 is_drop
667ad134
MT
1892 )
1893 VALUES
1894 (
1895 %s, %s, %s, %s, %s, %s
1896 )
1897 ON CONFLICT (network) DO NOTHING
1898 """,
d7fc3057
MT
1899 "%s" % network,
1900 block.get("country"),
28d29b7c
MT
1901 self._parse_bool(block, "is-anonymous-proxy"),
1902 self._parse_bool(block, "is-satellite-provider"),
1903 self._parse_bool(block, "is-anycast"),
e17e804e 1904 self._parse_bool(block, "drop"),
d7fc3057
MT
1905 )
1906
f476cdfd
MT
1907 elif type == "aut-num":
1908 autnum = block.get("aut-num")
d7fc3057
MT
1909
1910 # Check if AS number begins with "AS"
1911 if not autnum.startswith("AS"):
1912 log.warning("Invalid AS number: %s" % autnum)
1913 continue
1914
1915 # Strip "AS"
1916 autnum = autnum[2:]
1917
1918 self.db.execute("""
667ad134
MT
1919 INSERT INTO
1920 autnum_overrides
1921 (
d7fc3057
MT
1922 number,
1923 name,
bd1aa6a1 1924 country,
d7fc3057
MT
1925 is_anonymous_proxy,
1926 is_satellite_provider,
e17e804e
PM
1927 is_anycast,
1928 is_drop
667ad134
MT
1929 )
1930 VALUES
1931 (
1932 %s, %s, %s, %s, %s, %s, %s
1933 )
1934 ON CONFLICT (number) DO NOTHING
1935 """,
bd1aa6a1
MT
1936 autnum,
1937 block.get("name"),
1938 block.get("country"),
28d29b7c
MT
1939 self._parse_bool(block, "is-anonymous-proxy"),
1940 self._parse_bool(block, "is-satellite-provider"),
1941 self._parse_bool(block, "is-anycast"),
e17e804e 1942 self._parse_bool(block, "drop"),
d7fc3057
MT
1943 )
1944
3efe0297
MT
1945 # Geofeeds
1946 elif type == "geofeed":
1947 url = block.get("geofeed")
1948
2aeec4ce
MT
1949 # Parse the URL
1950 try:
1951 url = urllib.parse.urlparse(url)
1952 except ValueError as e:
1953 log.warning("Skipping invalid URL %s: %s" % (url, e))
1954 continue
1955
1956 # Make sure that this is a HTTPS URL
1957 if not url.scheme == "https":
1958 log.warning("Skipping Geofeed URL that is not using HTTPS: %s" \
1959 % url.geturl())
1960 continue
1961
1962 # Normalize the URL and convert it back
1963 url = url.geturl()
3efe0297
MT
1964
1965 self.db.execute("""
1966 INSERT INTO
1967 geofeed_overrides
1968 (
1969 url
1970 )
1971 VALUES
1972 (
1973 %s
1974 )
1975 ON CONFLICT (url) DO NOTHING
1976 """, url,
1977 )
1978
d7fc3057 1979 else:
03d24a9b 1980 log.warning("Unsupported type: %s" % type)
d7fc3057 1981
98d9079b 1982 async def handle_update_feeds(self, ns):
93aad7f7
MT
1983 """
1984 Update any third-party feeds
1985 """
a775372b
MT
1986 success = True
1987
a775372b
MT
1988 feeds = (
1989 # AWS IP Ranges
1990 ("AWS-IP-RANGES", self._import_aws_ip_ranges, "https://ip-ranges.amazonaws.com/ip-ranges.json"),
377cead4
MT
1991
1992 # Spamhaus DROP
1993 ("SPAMHAUS-DROP", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/drop.txt"),
1994 ("SPAMHAUS-EDROP", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/edrop.txt"),
1995 ("SPAMHAUS-DROPV6", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/dropv6.txt"),
5dcda5c8
MT
1996
1997 # Spamhaus ASNDROP
1998 ("SPAMHAUS-ASNDROP", self._import_spamhaus_asndrop, "https://www.spamhaus.org/drop/asndrop.json"),
a775372b 1999 )
659c2810
MT
2000
2001 # Drop any data from feeds that we don't support (any more)
2002 with self.db.transaction():
2003 # Fetch the names of all feeds we support
2004 sources = [name for name, *rest in feeds]
2005
2006 self.db.execute("DELETE FROM autnum_feeds WHERE NOT source = ANY(%s)", sources)
2007 self.db.execute("DELETE FROM network_feeds WHERE NOT source = ANY(%s)", sources)
a775372b
MT
2008
2009 # Walk through all feeds
2010 for name, callback, url, *args in feeds:
f7f8e714
MT
2011 # Skip any feeds that were not requested on the command line
2012 if ns.feeds and not name in ns.feeds:
2013 continue
2014
a775372b 2015 try:
98d9079b 2016 await self._process_feed(name, callback, url, *args)
a775372b
MT
2017
2018 # Log an error but continue if an exception occurs
2019 except Exception as e:
2020 log.error("Error processing feed '%s': %s" % (name, e))
2021 success = False
93aad7f7 2022
a775372b
MT
2023 # Return status
2024 return 0 if success else 1
dcef2ba4 2025
98d9079b 2026 async def _process_feed(self, name, callback, url, *args):
a775372b
MT
2027 """
2028 Processes one feed
2029 """
2030 # Open the URL
335946c0 2031 f = self.downloader.retrieve(url)
85e44d9a 2032
a775372b
MT
2033 with self.db.transaction():
2034 # Drop any previous content
2035 self.db.execute("DELETE FROM autnum_feeds WHERE source = %s", name)
2036 self.db.execute("DELETE FROM network_feeds WHERE source = %s", name)
dcef2ba4 2037
a775372b 2038 # Call the callback to process the feed
c86209e2
MT
2039 with self.db.pipeline():
2040 return await callback(name, f, *args)
a775372b 2041
98d9079b 2042 async def _import_aws_ip_ranges(self, name, f):
a775372b 2043 # Parse the feed
c30e421e 2044 feed = json.load(f)
68fdeb3b 2045
99e4a4eb 2046 # Set up a dictionary for mapping a region name to a country. Unfortunately,
dcef2ba4
PM
2047 # there seems to be no machine-readable version available of this other than
2048 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
2049 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
2050 # was helpful here as well.
2051 aws_region_country_map = {
99e4a4eb
MT
2052 # Africa
2053 "af-south-1" : "ZA",
2054
2055 # Asia
2056 "il-central-1" : "IL", # Tel Aviv
2057
2058 # Asia/Pacific
2059 "ap-northeast-1" : "JP",
2060 "ap-northeast-2" : "KR",
2061 "ap-northeast-3" : "JP",
2062 "ap-east-1" : "HK",
2063 "ap-south-1" : "IN",
2064 "ap-south-2" : "IN",
2065 "ap-southeast-1" : "SG",
2066 "ap-southeast-2" : "AU",
2067 "ap-southeast-3" : "MY",
2068 "ap-southeast-4" : "AU",
2069 "ap-southeast-5" : "NZ", # Auckland, NZ
2070 "ap-southeast-6" : "AP", # XXX: Precise location not documented anywhere
2071
2072 # Canada
2073 "ca-central-1" : "CA",
2074 "ca-west-1" : "CA",
2075
2076 # Europe
2077 "eu-central-1" : "DE",
2078 "eu-central-2" : "CH",
2079 "eu-north-1" : "SE",
2080 "eu-west-1" : "IE",
2081 "eu-west-2" : "GB",
2082 "eu-west-3" : "FR",
2083 "eu-south-1" : "IT",
2084 "eu-south-2" : "ES",
2085
2086 # Middle East
2087 "me-central-1" : "AE",
2088 "me-south-1" : "BH",
2089
2090 # South America
2091 "sa-east-1" : "BR",
2092
2093 # Undocumented, likely located in Berlin rather than Frankfurt
2094 "eusc-de-east-1" : "DE",
2095 }
dcef2ba4 2096
c30e421e
MT
2097 # Collect a list of all networks
2098 prefixes = feed.get("ipv6_prefixes", []) + feed.get("prefixes", [])
dcef2ba4 2099
c30e421e
MT
2100 for prefix in prefixes:
2101 # Fetch network
2102 network = prefix.get("ipv6_prefix") or prefix.get("ip_prefix")
2103
2104 # Parse the network
a775372b 2105 try:
c30e421e
MT
2106 network = ipaddress.ip_network(network)
2107 except ValuleError as e:
2108 log.warning("%s: Unable to parse prefix %s" % (name, network))
a775372b 2109 continue
dcef2ba4 2110
a775372b
MT
2111 # Sanitize parsed networks...
2112 if not self._check_parsed_network(network):
2113 continue
dcef2ba4 2114
c30e421e
MT
2115 # Fetch the region
2116 region = prefix.get("region")
2117
2118 # Set some defaults
a775372b
MT
2119 cc = None
2120 is_anycast = False
2121
c30e421e
MT
2122 # Fetch the CC from the dictionary
2123 try:
a775372b 2124 cc = aws_region_country_map[region]
dcef2ba4 2125
c30e421e
MT
2126 # If we couldn't find anything, let's try something else...
2127 except KeyError as e:
2128 # Find anycast networks
2129 if region == "GLOBAL":
2130 is_anycast = True
2131
2132 # Everything that starts with us- is probably in the United States
2133 elif region.startswith("us-"):
2134 cc = "US"
2135
2136 # Everything that starts with cn- is probably China
2137 elif region.startswith("cn-"):
2138 cc = "CN"
2139
2140 # Log a warning for anything else
2141 else:
2142 log.warning("%s: Could not determine country code for AWS region %s" \
2143 % (name, region))
2144 continue
dcef2ba4 2145
c30e421e 2146 # Write to database
a775372b
MT
2147 self.db.execute("""
2148 INSERT INTO
2149 network_feeds
2150 (
2151 network,
2152 source,
2153 country,
2154 is_anycast
dcef2ba4 2155 )
a775372b
MT
2156 VALUES
2157 (
2158 %s, %s, %s, %s
2159 )
2160 ON CONFLICT (network, source) DO NOTHING
c30e421e 2161 """, "%s" % network, name, cc, is_anycast,
a775372b 2162 )
dcef2ba4 2163
98d9079b 2164 async def _import_spamhaus_drop(self, name, f):
377cead4
MT
2165 """
2166 Import Spamhaus DROP IP feeds
2167 """
2168 # Count all lines
2169 lines = 0
69b3d894 2170
377cead4
MT
2171 # Walk through all lines
2172 for line in f:
2173 # Decode line
2174 line = line.decode("utf-8")
69b3d894 2175
377cead4
MT
2176 # Strip off any comments
2177 line, _, comment = line.partition(";")
69b3d894 2178
377cead4
MT
2179 # Ignore empty lines
2180 if not line:
2181 continue
85e44d9a 2182
377cead4
MT
2183 # Strip any excess whitespace
2184 line = line.strip()
69b3d894 2185
377cead4
MT
2186 # Increment line counter
2187 lines += 1
5acac2a4 2188
377cead4
MT
2189 # Parse the network
2190 try:
2191 network = ipaddress.ip_network(line)
2192 except ValueError as e:
2193 log.warning("%s: Could not parse network: %s - %s" % (name, line, e))
2194 continue
69b3d894 2195
377cead4
MT
2196 # Check network
2197 if not self._check_parsed_network(network):
2198 log.warning("%s: Skipping bogus network: %s" % (name, network))
2199 continue
69b3d894 2200
377cead4
MT
2201 # Insert into the database
2202 self.db.execute("""
2203 INSERT INTO
2204 network_feeds
2205 (
2206 network,
2207 source,
2208 is_drop
2209 )
2210 VALUES
2211 (
2212 %s, %s, %s
2213 )""", "%s" % network, name, True,
2214 )
69b3d894 2215
377cead4
MT
2216 # Raise an exception if we could not import anything
2217 if not lines:
2218 raise RuntimeError("Received bogus feed %s with no data" % name)
69b3d894 2219
98d9079b 2220 async def _import_spamhaus_asndrop(self, name, f):
5dcda5c8
MT
2221 """
2222 Import Spamhaus ASNDROP feed
2223 """
2224 for line in f:
2225 # Decode the line
2226 line = line.decode("utf-8")
5acac2a4 2227
5dcda5c8
MT
2228 # Parse JSON
2229 try:
2230 line = json.loads(line)
2231 except json.JSONDecodeError as e:
2232 log.warning("%s: Unable to parse JSON object %s: %s" % (name, line, e))
2233 continue
69b3d894 2234
5dcda5c8
MT
2235 # Fetch type
2236 type = line.get("type")
69b3d894 2237
5dcda5c8
MT
2238 # Skip any metadata
2239 if type == "metadata":
2240 continue
69b3d894 2241
5dcda5c8
MT
2242 # Fetch ASN
2243 asn = line.get("asn")
69b3d894 2244
5dcda5c8
MT
2245 # Skip any lines without an ASN
2246 if not asn:
2247 continue
69b3d894 2248
5dcda5c8
MT
2249 # Filter invalid ASNs
2250 if not self._check_parsed_asn(asn):
2251 log.warning("%s: Skipping bogus ASN %s" % (name, asn))
2252 continue
69b3d894 2253
5dcda5c8
MT
2254 # Write to database
2255 self.db.execute("""
2256 INSERT INTO
2257 autnum_feeds
2258 (
2259 number,
2260 source,
2261 is_drop
2262 )
2263 VALUES
2264 (
2265 %s, %s, %s
2266 )""", "%s" % asn, name, True,
2267 )
69b3d894 2268
28d29b7c
MT
2269 @staticmethod
2270 def _parse_bool(block, key):
2271 val = block.get(key)
2272
2273 # There is no point to proceed when we got None
2274 if val is None:
2275 return
2276
2277 # Convert to lowercase
2278 val = val.lower()
2279
2280 # True
2281 if val in ("yes", "1"):
2282 return True
2283
2284 # False
2285 if val in ("no", "0"):
2286 return False
2287
2288 # Default to None
2289 return None
2290
98d9079b 2291 async def handle_import_countries(self, ns):
8084b33a
MT
2292 with self.db.transaction():
2293 # Drop all data that we have
2294 self.db.execute("TRUNCATE TABLE countries")
2295
2296 for file in ns.file:
2297 for line in file:
2298 line = line.rstrip()
2299
2300 # Ignore any comments
2301 if line.startswith("#"):
2302 continue
2303
2304 try:
2305 country_code, continent_code, name = line.split(maxsplit=2)
2306 except:
2307 log.warning("Could not parse line: %s" % line)
2308 continue
2309
2310 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
2311 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
2312
6ffd06b5
MT
2313
2314def split_line(line):
2315 key, colon, val = line.partition(":")
2316
2317 # Strip any excess space
2318 key = key.strip()
2319 val = val.strip()
78ff0cf2 2320
6ffd06b5 2321 return key, val
78ff0cf2 2322
8c3529c7
MT
2323def read_blocks(f):
2324 for block in iterate_over_blocks(f):
2325 type = None
2326 data = {}
2327
2328 for i, line in enumerate(block):
2329 key, value = line.split(":", 1)
2330
2331 # The key of the first line defines the type
2332 if i == 0:
2333 type = key
2334
2335 # Store value
2336 data[key] = value.strip()
2337
2338 yield type, data
2339
2340def iterate_over_blocks(f, charsets=("utf-8", "latin1")):
2341 block = []
2342
2343 for line in f:
2344 # Skip commented lines
2345 if line.startswith(b"#") or line.startswith(b"%"):
2346 continue
2347
2348 # Convert to string
2349 for charset in charsets:
2350 try:
2351 line = line.decode(charset)
2352 except UnicodeDecodeError:
2353 continue
2354 else:
2355 break
2356
2357 # Remove any comments at the end of line
2358 line, hash, comment = line.partition("#")
2359
2360 # Strip any whitespace at the end of the line
2361 line = line.rstrip()
2362
2363 # If we cut off some comment and the line is empty, we can skip it
2364 if comment and not line:
2365 continue
2366
2367 # If the line has some content, keep collecting it
2368 if line:
2369 block.append(line)
2370 continue
2371
2372 # End the block on an empty line
2373 if block:
2374 yield block
2375
2376 # Reset the block
2377 block = []
2378
2379 # Return the last block
2380 if block:
2381 yield block
2382
2383def iterate_over_lines(f):
2384 for line in f:
2385 # Decode the line
2386 line = line.decode()
2387
2388 # Strip the ending
2389 yield line.rstrip()
2390
98d9079b 2391async def main():
78ff0cf2
MT
2392 # Run the command line interface
2393 c = CLI()
78ff0cf2 2394
98d9079b
MT
2395 await c.run()
2396
2397asyncio.run(main())