]> git.ipfire.org Git - location/libloc.git/blame - src/scripts/location-importer.in
importer: Refactor parsing Spamhaus DROP feeds
[location/libloc.git] / src / scripts / location-importer.in
CommitLineData
78ff0cf2
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
5c7cfeb2 6# Copyright (C) 2020-2024 IPFire Development Team <info@ipfire.org> #
78ff0cf2
MT
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import argparse
0fcdd689 21import concurrent.futures
f4f3d8ad 22import csv
a6fedd9f 23import http.client
6ffd06b5 24import ipaddress
dcef2ba4 25import json
78ff0cf2 26import logging
6ffd06b5
MT
27import math
28import re
22d8d199 29import socket
78ff0cf2 30import sys
3ce28dea 31import urllib.error
78ff0cf2
MT
32
33# Load our location module
34import location
29c6fa22 35import location.database
3192b66c 36import location.importer
78ff0cf2
MT
37from location.i18n import _
38
39# Initialise logging
40log = logging.getLogger("location.importer")
41log.propagate = 1
42
43fe570c
PM
43# Define constants
44VALID_ASN_RANGES = (
45 (1, 23455),
46 (23457, 64495),
47 (131072, 4199999999),
48)
49
18a72eac
MT
50# Configure the CSV parser for ARIN
51csv.register_dialect("arin", delimiter=",", quoting=csv.QUOTE_ALL, quotechar="\"")
43fe570c 52
78ff0cf2
MT
53class CLI(object):
54 def parse_cli(self):
55 parser = argparse.ArgumentParser(
56 description=_("Location Importer Command Line Interface"),
57 )
6ffd06b5 58 subparsers = parser.add_subparsers()
78ff0cf2
MT
59
60 # Global configuration flags
61 parser.add_argument("--debug", action="store_true",
62 help=_("Enable debug output"))
bc1f5f53
MT
63 parser.add_argument("--quiet", action="store_true",
64 help=_("Enable quiet mode"))
78ff0cf2
MT
65
66 # version
67 parser.add_argument("--version", action="version",
68 version="%(prog)s @VERSION@")
69
29c6fa22
MT
70 # Database
71 parser.add_argument("--database-host", required=True,
72 help=_("Database Hostname"), metavar=_("HOST"))
73 parser.add_argument("--database-name", required=True,
74 help=_("Database Name"), metavar=_("NAME"))
75 parser.add_argument("--database-username", required=True,
76 help=_("Database Username"), metavar=_("USERNAME"))
77 parser.add_argument("--database-password", required=True,
78 help=_("Database Password"), metavar=_("PASSWORD"))
79
0983f3dd
MT
80 # Write Database
81 write = subparsers.add_parser("write", help=_("Write database to file"))
82 write.set_defaults(func=self.handle_write)
83 write.add_argument("file", nargs=1, help=_("Database File"))
84 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
1164d876 85 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
0983f3dd
MT
86 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
87 write.add_argument("--description", nargs="?", help=_("Sets a description"))
88 write.add_argument("--license", nargs="?", help=_("Sets the license"))
b904896a 89 write.add_argument("--version", type=int, help=_("Database Format Version"))
0983f3dd 90
6ffd06b5
MT
91 # Update WHOIS
92 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
93 update_whois.set_defaults(func=self.handle_update_whois)
94
83d61c46
MT
95 # Update announcements
96 update_announcements = subparsers.add_parser("update-announcements",
97 help=_("Update BGP Annoucements"))
98 update_announcements.set_defaults(func=self.handle_update_announcements)
99 update_announcements.add_argument("server", nargs=1,
100 help=_("Route Server to connect to"), metavar=_("SERVER"))
101
0fcdd689
MT
102 # Update geofeeds
103 update_geofeeds = subparsers.add_parser("update-geofeeds",
104 help=_("Update Geofeeds"))
105 update_geofeeds.set_defaults(func=self.handle_update_geofeeds)
106
93aad7f7
MT
107 # Update feeds
108 update_feeds = subparsers.add_parser("update-feeds",
109 help=_("Update Feeds"))
f7f8e714
MT
110 update_feeds.add_argument("feeds", nargs="*",
111 help=_("Only update these feeds"))
93aad7f7
MT
112 update_feeds.set_defaults(func=self.handle_update_feeds)
113
d7fc3057
MT
114 # Update overrides
115 update_overrides = subparsers.add_parser("update-overrides",
116 help=_("Update overrides"),
117 )
118 update_overrides.add_argument(
119 "files", nargs="+", help=_("Files to import"),
120 )
121 update_overrides.set_defaults(func=self.handle_update_overrides)
122
8084b33a
MT
123 # Import countries
124 import_countries = subparsers.add_parser("import-countries",
125 help=_("Import countries"),
126 )
127 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
128 help=_("File to import"))
129 import_countries.set_defaults(func=self.handle_import_countries)
130
78ff0cf2
MT
131 args = parser.parse_args()
132
bc1f5f53 133 # Configure logging
78ff0cf2 134 if args.debug:
f9de5e61 135 location.logger.set_level(logging.DEBUG)
bc1f5f53
MT
136 elif args.quiet:
137 location.logger.set_level(logging.WARNING)
78ff0cf2 138
6ffd06b5
MT
139 # Print usage if no action was given
140 if not "func" in args:
141 parser.print_usage()
142 sys.exit(2)
143
78ff0cf2
MT
144 return args
145
146 def run(self):
147 # Parse command line arguments
148 args = self.parse_cli()
149
29c6fa22 150 # Initialise database
6ffd06b5 151 self.db = self._setup_database(args)
29c6fa22 152
78ff0cf2 153 # Call function
6ffd06b5 154 ret = args.func(args)
78ff0cf2
MT
155
156 # Return with exit code
157 if ret:
158 sys.exit(ret)
159
160 # Otherwise just exit
161 sys.exit(0)
162
29c6fa22
MT
163 def _setup_database(self, ns):
164 """
165 Initialise the database
166 """
167 # Connect to database
168 db = location.database.Connection(
169 host=ns.database_host, database=ns.database_name,
170 user=ns.database_username, password=ns.database_password,
171 )
172
173 with db.transaction():
174 db.execute("""
83d61c46
MT
175 -- announcements
176 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
177 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
178 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
179 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
180 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
a1707d89 181 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
83d61c46 182
6ffd06b5 183 -- autnums
0983f3dd 184 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
26f06e70 185 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
6ffd06b5
MT
186 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
187
8084b33a
MT
188 -- countries
189 CREATE TABLE IF NOT EXISTS countries(
190 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
191 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
192
429a43d1 193 -- networks
83d61c46 194 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
b6b2b331 195 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
26f06e70 196 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
429a43d1 197 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
002deb6b 198 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
83d61c46 199 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
d7fc3057 200
183b2f74 201 -- geofeeds
0fcdd689
MT
202 CREATE TABLE IF NOT EXISTS geofeeds(
203 id serial primary key,
204 url text,
205 status integer default null,
206 updated_at timestamp without time zone default null
207 );
ce308393 208 ALTER TABLE geofeeds ADD COLUMN IF NOT EXISTS error text;
0fcdd689
MT
209 CREATE UNIQUE INDEX IF NOT EXISTS geofeeds_unique
210 ON geofeeds(url);
211 CREATE TABLE IF NOT EXISTS geofeed_networks(
212 geofeed_id integer references geofeeds(id) on delete cascade,
213 network inet,
214 country text,
215 region text,
216 city text
217 );
218 CREATE INDEX IF NOT EXISTS geofeed_networks_geofeed_id
219 ON geofeed_networks(geofeed_id);
220 CREATE INDEX IF NOT EXISTS geofeed_networks_search
47fc42fb 221 ON geofeed_networks USING GIST(network inet_ops);
183b2f74
MT
222 CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
223 CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique
224 ON network_geofeeds(network);
7f0a6add
MT
225 CREATE INDEX IF NOT EXISTS network_geofeeds_search
226 ON network_geofeeds USING GIST(network inet_ops);
227 CREATE INDEX IF NOT EXISTS network_geofeeds_url
228 ON network_geofeeds(url);
183b2f74 229
7808c8b7
MT
230 -- feeds
231 CREATE TABLE IF NOT EXISTS autnum_feeds(
232 number bigint NOT NULL,
233 source text NOT NULL,
234 name text,
235 country text,
236 is_anonymous_proxy boolean,
237 is_satellite_provider boolean,
238 is_anycast boolean,
239 is_drop boolean
240 );
241 CREATE UNIQUE INDEX IF NOT EXISTS autnum_feeds_unique
242 ON autnum_feeds(number, source);
243
c923c1cc
MT
244 CREATE TABLE IF NOT EXISTS network_feeds(
245 network inet NOT NULL,
246 source text NOT NULL,
247 country text,
248 is_anonymous_proxy boolean,
249 is_satellite_provider boolean,
250 is_anycast boolean,
251 is_drop boolean
252 );
253 CREATE UNIQUE INDEX IF NOT EXISTS network_feeds_unique
254 ON network_feeds(network, source);
255 CREATE INDEX IF NOT EXISTS network_feeds_search
256 ON network_feeds USING GIST(network inet_ops);
257
d7fc3057
MT
258 -- overrides
259 CREATE TABLE IF NOT EXISTS autnum_overrides(
260 number bigint NOT NULL,
261 name text,
bd1aa6a1 262 country text,
b8e25b71
MT
263 is_anonymous_proxy boolean,
264 is_satellite_provider boolean,
265 is_anycast boolean
d7fc3057
MT
266 );
267 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
268 ON autnum_overrides(number);
39ee3120 269 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
e17e804e 270 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
d7fc3057
MT
271
272 CREATE TABLE IF NOT EXISTS network_overrides(
273 network inet NOT NULL,
274 country text,
b8e25b71
MT
275 is_anonymous_proxy boolean,
276 is_satellite_provider boolean,
277 is_anycast boolean
d7fc3057
MT
278 );
279 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
280 ON network_overrides(network);
991baf53
MT
281 CREATE INDEX IF NOT EXISTS network_overrides_search
282 ON network_overrides USING GIST(network inet_ops);
39ee3120 283 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
e17e804e 284 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
29c6fa22
MT
285 """)
286
287 return db
288
e61a9352
MT
289 def fetch_countries(self):
290 """
291 Returns a list of all countries on the list
292 """
293 # Fetch all valid country codes to check parsed networks aganist...
294 countries = self.db.query("SELECT country_code FROM countries ORDER BY country_code")
295
296 return [country.country_code for country in countries]
297
0983f3dd
MT
298 def handle_write(self, ns):
299 """
300 Compiles a database in libloc format out of what is in the database
301 """
0983f3dd 302 # Allocate a writer
1164d876 303 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
0983f3dd
MT
304
305 # Set all metadata
306 if ns.vendor:
307 writer.vendor = ns.vendor
308
309 if ns.description:
310 writer.description = ns.description
311
312 if ns.license:
313 writer.license = ns.license
314
315 # Add all Autonomous Systems
316 log.info("Writing Autonomous Systems...")
317
318 # Select all ASes with a name
6e97c44b
MT
319 rows = self.db.query("""
320 SELECT
321 autnums.number AS number,
322 COALESCE(
ef1581fb 323 overrides.name,
6e97c44b
MT
324 autnums.name
325 ) AS name
ef1581fb
MT
326 FROM
327 autnums
328 LEFT JOIN
329 autnum_overrides overrides ON autnums.number = overrides.number
330 ORDER BY
331 autnums.number
332 """)
0983f3dd
MT
333
334 for row in rows:
008d5468
MT
335 # Skip AS without names
336 if not row.name:
337 continue
338
0983f3dd
MT
339 a = writer.add_as(row.number)
340 a.name = row.name
341
342 # Add all networks
343 log.info("Writing networks...")
344
345 # Select all known networks
346 rows = self.db.query("""
295a7774
MT
347 WITH known_networks AS (
348 SELECT network FROM announcements
349 UNION
350 SELECT network FROM networks
351 UNION
25b60228 352 SELECT network FROM network_feeds
c923c1cc 353 UNION
295a7774 354 SELECT network FROM network_overrides
e87dbd44
MT
355 UNION
356 SELECT network FROM geofeed_networks
295a7774
MT
357 ),
358
359 ordered_networks AS (
360 SELECT
361 known_networks.network AS network,
362 announcements.autnum AS autnum,
363 networks.country AS country,
364
365 -- Must be part of returned values for ORDER BY clause
366 masklen(announcements.network) AS sort_a,
367 masklen(networks.network) AS sort_b
368 FROM
369 known_networks
370 LEFT JOIN
371 announcements ON known_networks.network <<= announcements.network
372 LEFT JOIN
373 networks ON known_networks.network <<= networks.network
374 ORDER BY
375 known_networks.network,
376 sort_a DESC,
377 sort_b DESC
378 )
379
5372d9c7
MT
380 -- Return a list of those networks enriched with all
381 -- other information that we store in the database
0983f3dd 382 SELECT
bbea93a7
MT
383 DISTINCT ON (network)
384 network,
385 autnum,
bd1aa6a1
MT
386
387 -- Country
388 COALESCE(
389 (
476297aa
MT
390 SELECT
391 country
392 FROM
393 network_overrides overrides
394 WHERE
395 networks.network <<= overrides.network
396 ORDER BY
397 masklen(overrides.network) DESC
398 LIMIT 1
bd1aa6a1
MT
399 ),
400 (
476297aa
MT
401 SELECT
402 country
403 FROM
404 autnum_overrides overrides
405 WHERE
406 networks.autnum = overrides.number
bd1aa6a1 407 ),
c923c1cc 408 (
476297aa
MT
409 SELECT
410 country
411 FROM
412 network_feeds feeds
413 WHERE
414 networks.network <<= feeds.network
415 ORDER BY
416 masklen(feeds.network) DESC
417 LIMIT 1
c923c1cc 418 ),
7808c8b7 419 (
476297aa
MT
420 SELECT
421 country
422 FROM
423 autnum_feeds feeds
424 WHERE
425 networks.autnum = feeds.number
426 ORDER BY
427 source
428 LIMIT 1
7808c8b7 429 ),
e87dbd44
MT
430 (
431 SELECT
432 geofeed_networks.country AS country
433 FROM
434 network_geofeeds
435
436 -- Join the data from the geofeeds
437 LEFT JOIN
438 geofeeds ON network_geofeeds.url = geofeeds.url
439 LEFT JOIN
440 geofeed_networks ON geofeeds.id = geofeed_networks.geofeed_id
441
442 -- Check whether we have a geofeed for this network
443 WHERE
444 networks.network <<= network_geofeeds.network
445 AND
446 networks.network <<= geofeed_networks.network
447
448 -- Filter for the best result
449 ORDER BY
450 masklen(geofeed_networks.network) DESC
451 LIMIT 1
452 ),
bd1aa6a1
MT
453 networks.country
454 ) AS country,
8e8555bb 455
0983f3dd 456 -- Flags
1422b5d4
MT
457 COALESCE(
458 (
476297aa
MT
459 SELECT
460 is_anonymous_proxy
461 FROM
462 network_overrides overrides
463 WHERE
464 networks.network <<= overrides.network
465 ORDER BY
466 masklen(overrides.network) DESC
467 LIMIT 1
1422b5d4 468 ),
c923c1cc 469 (
476297aa
MT
470 SELECT
471 is_anonymous_proxy
472 FROM
473 network_feeds feeds
474 WHERE
475 networks.network <<= feeds.network
476 ORDER BY
477 masklen(feeds.network) DESC
478 LIMIT 1
c923c1cc 479 ),
7808c8b7 480 (
476297aa
MT
481 SELECT
482 is_anonymous_proxy
483 FROM
484 autnum_feeds feeds
485 WHERE
486 networks.autnum = feeds.number
487 ORDER BY
488 source
489 LIMIT 1
7808c8b7 490 ),
1422b5d4 491 (
476297aa
MT
492 SELECT
493 is_anonymous_proxy
494 FROM
495 autnum_overrides overrides
496 WHERE
497 networks.autnum = overrides.number
b8e25b71
MT
498 ),
499 FALSE
1422b5d4
MT
500 ) AS is_anonymous_proxy,
501 COALESCE(
502 (
476297aa
MT
503 SELECT
504 is_satellite_provider
505 FROM
506 network_overrides overrides
507 WHERE
508 networks.network <<= overrides.network
509 ORDER BY
510 masklen(overrides.network) DESC
511 LIMIT 1
1422b5d4 512 ),
c923c1cc 513 (
476297aa
MT
514 SELECT
515 is_satellite_provider
516 FROM
517 network_feeds feeds
518 WHERE
519 networks.network <<= feeds.network
520 ORDER BY
521 masklen(feeds.network) DESC
522 LIMIT 1
c923c1cc 523 ),
7808c8b7 524 (
476297aa
MT
525 SELECT
526 is_satellite_provider
527 FROM
528 autnum_feeds feeds
529 WHERE
530 networks.autnum = feeds.number
531 ORDER BY
532 source
533 LIMIT 1
7808c8b7 534 ),
1422b5d4 535 (
476297aa
MT
536 SELECT
537 is_satellite_provider
538 FROM
539 autnum_overrides overrides
540 WHERE
541 networks.autnum = overrides.number
b8e25b71
MT
542 ),
543 FALSE
1422b5d4
MT
544 ) AS is_satellite_provider,
545 COALESCE(
546 (
476297aa
MT
547 SELECT
548 is_anycast
549 FROM
550 network_overrides overrides
551 WHERE
552 networks.network <<= overrides.network
553 ORDER BY
554 masklen(overrides.network) DESC
555 LIMIT 1
1422b5d4 556 ),
c923c1cc 557 (
476297aa
MT
558 SELECT
559 is_anycast
560 FROM
561 network_feeds feeds
562 WHERE
563 networks.network <<= feeds.network
564 ORDER BY
565 masklen(feeds.network) DESC
566 LIMIT 1
c923c1cc 567 ),
7808c8b7 568 (
476297aa
MT
569 SELECT
570 is_anycast
571 FROM
572 autnum_feeds feeds
573 WHERE
574 networks.autnum = feeds.number
575 ORDER BY
576 source
577 LIMIT 1
7808c8b7 578 ),
1422b5d4 579 (
476297aa
MT
580 SELECT
581 is_anycast
582 FROM
583 autnum_overrides overrides
584 WHERE
585 networks.autnum = overrides.number
b8e25b71
MT
586 ),
587 FALSE
e17e804e
PM
588 ) AS is_anycast,
589 COALESCE(
590 (
476297aa
MT
591 SELECT
592 is_drop
593 FROM
594 network_overrides overrides
595 WHERE
596 networks.network <<= overrides.network
597 ORDER BY
598 masklen(overrides.network) DESC
599 LIMIT 1
e17e804e 600 ),
c923c1cc 601 (
476297aa
MT
602 SELECT
603 is_drop
604 FROM
605 network_feeds feeds
606 WHERE
607 networks.network <<= feeds.network
608 ORDER BY
609 masklen(feeds.network) DESC
610 LIMIT 1
c923c1cc 611 ),
7808c8b7 612 (
476297aa
MT
613 SELECT
614 is_drop
615 FROM
616 autnum_feeds feeds
617 WHERE
618 networks.autnum = feeds.number
619 ORDER BY
620 source
621 LIMIT 1
7808c8b7 622 ),
e17e804e 623 (
476297aa
MT
624 SELECT
625 is_drop
626 FROM
627 autnum_overrides overrides
628 WHERE
629 networks.autnum = overrides.number
e17e804e
PM
630 ),
631 FALSE
632 ) AS is_drop
295a7774
MT
633 FROM
634 ordered_networks networks
0983f3dd
MT
635 """)
636
637 for row in rows:
638 network = writer.add_network(row.network)
639
5372d9c7
MT
640 # Save country
641 if row.country:
642 network.country_code = row.country
643
644 # Save ASN
645 if row.autnum:
646 network.asn = row.autnum
0983f3dd
MT
647
648 # Set flags
649 if row.is_anonymous_proxy:
650 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
651
652 if row.is_satellite_provider:
653 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
654
655 if row.is_anycast:
656 network.set_flag(location.NETWORK_FLAG_ANYCAST)
657
e17e804e
PM
658 if row.is_drop:
659 network.set_flag(location.NETWORK_FLAG_DROP)
660
8084b33a
MT
661 # Add all countries
662 log.info("Writing countries...")
663 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
664
665 for row in rows:
666 c = writer.add_country(row.country_code)
667 c.continent_code = row.continent_code
668 c.name = row.name
669
0983f3dd
MT
670 # Write everything to file
671 log.info("Writing database to file...")
672 for file in ns.file:
673 writer.write(file)
674
6ffd06b5
MT
675 def handle_update_whois(self, ns):
676 downloader = location.importer.Downloader()
677
3ce28dea
MT
678 # Did we run successfully?
679 error = False
84b175e2 680
fa9220b0 681 # Fetch all valid country codes to check parsed networks against
e61a9352 682 validcountries = self.fetch_countries()
6ffd06b5 683
3ce28dea 684 # Iterate over all potential sources
f3edaa9c 685 for source in sorted(location.importer.SOURCES):
3ce28dea
MT
686 with self.db.transaction():
687 # Create some temporary tables to store parsed data
688 self.db.execute("""
689 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
690 organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
691 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
692
693 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
694 name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
695 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
696
697 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL,
698 original_countries text[] NOT NULL, source text NOT NULL)
699 ON COMMIT DROP;
700 CREATE INDEX _rirdata_search ON _rirdata
701 USING BTREE(family(network), masklen(network));
702 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
703 """)
002deb6b 704
3ce28dea 705 # Remove all previously imported content
54b3f745 706 self.db.execute("DELETE FROM autnums WHERE source = %s", source)
3ce28dea 707 self.db.execute("DELETE FROM networks WHERE source = %s", source)
002deb6b 708
3ce28dea
MT
709 try:
710 # Fetch WHOIS sources
711 for url in location.importer.WHOIS_SOURCES.get(source, []):
712 for block in downloader.request_blocks(url):
713 self._parse_block(block, source, validcountries)
714
715 # Fetch extended sources
716 for url in location.importer.EXTENDED_SOURCES.get(source, []):
717 for line in downloader.request_lines(url):
718 self._parse_line(line, source, validcountries)
719 except urllib.error.URLError as e:
e76b8204 720 log.error("Could not retrieve data from %s: %s" % (source, e))
3ce28dea
MT
721 error = True
722
723 # Continue with the next source
724 continue
002deb6b 725
3ce28dea
MT
726 # Process all parsed networks from every RIR we happen to have access to,
727 # insert the largest network chunks into the networks table immediately...
fa9220b0
MT
728 families = self.db.query("""
729 SELECT DISTINCT
730 family(network) AS family
731 FROM
732 _rirdata
733 ORDER BY
734 family(network)
735 """,
736 )
3ce28dea
MT
737
738 for family in (row.family for row in families):
5bfa1bb4
MT
739 # Fetch the smallest mask length in our data set
740 smallest = self.db.get("""
741 SELECT
742 MIN(
743 masklen(network)
744 ) AS prefix
745 FROM
746 _rirdata
747 WHERE
fa9220b0
MT
748 family(network) = %s
749 """, family,
5bfa1bb4
MT
750 )
751
752 # Copy all networks
753 self.db.execute("""
754 INSERT INTO
755 networks
756 (
757 network,
758 country,
759 original_countries,
760 source
761 )
762 SELECT
763 network,
764 country,
765 original_countries,
766 source
767 FROM
768 _rirdata
769 WHERE
770 masklen(network) = %s
771 AND
772 family(network) = %s
773 ON CONFLICT DO
774 NOTHING""",
775 smallest.prefix,
776 family,
777 )
3ce28dea
MT
778
779 # ... determine any other prefixes for this network family, ...
5bfa1bb4
MT
780 prefixes = self.db.query("""
781 SELECT
782 DISTINCT masklen(network) AS prefix
783 FROM
784 _rirdata
785 WHERE
786 family(network) = %s
787 ORDER BY
788 masklen(network) ASC
fa9220b0
MT
789 OFFSET 1
790 """, family,
5bfa1bb4 791 )
3ce28dea
MT
792
793 # ... and insert networks with this prefix in case they provide additional
794 # information (i. e. subnet of a larger chunk with a different country)
795 for prefix in (row.prefix for row in prefixes):
796 self.db.execute("""
797 WITH candidates AS (
798 SELECT
799 _rirdata.network,
800 _rirdata.country,
801 _rirdata.original_countries,
802 _rirdata.source
803 FROM
804 _rirdata
805 WHERE
806 family(_rirdata.network) = %s
807 AND
808 masklen(_rirdata.network) = %s
809 ),
810 filtered AS (
811 SELECT
812 DISTINCT ON (c.network)
813 c.network,
814 c.country,
815 c.original_countries,
816 c.source,
817 masklen(networks.network),
818 networks.country AS parent_country
819 FROM
820 candidates c
821 LEFT JOIN
822 networks
823 ON
824 c.network << networks.network
825 ORDER BY
826 c.network,
827 masklen(networks.network) DESC NULLS LAST
828 )
829 INSERT INTO
830 networks(network, country, original_countries, source)
002deb6b 831 SELECT
3ce28dea
MT
832 network,
833 country,
834 original_countries,
835 source
002deb6b 836 FROM
3ce28dea 837 filtered
002deb6b 838 WHERE
3ce28dea
MT
839 parent_country IS NULL
840 OR
841 country <> parent_country
fa9220b0
MT
842 ON CONFLICT DO NOTHING
843 """, family, prefix,
002deb6b 844 )
0365119d 845
3ce28dea 846 self.db.execute("""
fa9220b0
MT
847 INSERT INTO
848 autnums
849 (
850 number,
851 name,
852 source
853 )
854 SELECT
855 _autnums.number,
856 _organizations.name,
857 _organizations.source
858 FROM
859 _autnums
860 JOIN
861 _organizations ON _autnums.organization = _organizations.handle
862 ON CONFLICT
863 (
864 number
865 )
866 DO UPDATE
867 SET name = excluded.name
868 """,
869 )
429a43d1 870
3ce28dea
MT
871 # Download and import (technical) AS names from ARIN
872 with self.db.transaction():
41f5a725 873 self._import_as_names_from_arin(downloader)
92403f39 874
3ce28dea
MT
875 # Return a non-zero exit code for errors
876 return 1 if error else 0
877
bd341642
PM
878 def _check_parsed_network(self, network):
879 """
880 Assistive function to detect and subsequently sort out parsed
881 networks from RIR data (both Whois and so-called "extended sources"),
882 which are or have...
883
884 (a) not globally routable (RFC 1918 space, et al.)
885 (b) covering a too large chunk of the IP address space (prefix length
886 is < 7 for IPv4 networks, and < 10 for IPv6)
887 (c) "0.0.0.0" or "::" as a network address
888 (d) are too small for being publicly announced (we have decided not to
889 process them at the moment, as they significantly enlarge our
890 database without providing very helpful additional information)
891
892 This unfortunately is necessary due to brain-dead clutter across
893 various RIR databases, causing mismatches and eventually disruptions.
894
895 We will return False in case a network is not suitable for adding
896 it to our database, and True otherwise.
897 """
898
899 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
900 return False
901
902 if not network.is_global:
2ba6ed07 903 log.debug("Skipping non-globally routable network: %s" % network)
bd341642
PM
904 return False
905
906 if network.version == 4:
907 if network.prefixlen < 7:
2ba6ed07 908 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
909 return False
910
911 if network.prefixlen > 24:
ebb087cf 912 log.debug("Skipping network too small to be publicly announced: %s" % network)
bd341642
PM
913 return False
914
915 if str(network.network_address) == "0.0.0.0":
2ba6ed07 916 log.debug("Skipping network based on 0.0.0.0: %s" % network)
bd341642
PM
917 return False
918
919 elif network.version == 6:
920 if network.prefixlen < 10:
2ba6ed07 921 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
922 return False
923
924 if network.prefixlen > 48:
ebb087cf 925 log.debug("Skipping network too small to be publicly announced: %s" % network)
bd341642
PM
926 return False
927
928 if str(network.network_address) == "::":
2ba6ed07 929 log.debug("Skipping network based on '::': %s" % network)
bd341642
PM
930 return False
931
932 else:
933 # This should not happen...
84187ab5 934 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
bd341642
PM
935 return False
936
937 # In case we have made it here, the network is considered to
938 # be suitable for libloc consumption...
939 return True
940
43fe570c
PM
941 def _check_parsed_asn(self, asn):
942 """
943 Assistive function to filter Autonomous System Numbers not being suitable
944 for adding to our database. Returns False in such cases, and True otherwise.
945 """
946
947 for start, end in VALID_ASN_RANGES:
948 if start <= asn and end >= asn:
949 return True
950
951 log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
952 return False
953
28b08385 954 def _parse_block(self, block, source_key, validcountries = None):
6ffd06b5
MT
955 # Get first line to find out what type of block this is
956 line = block[0]
957
6ffd06b5 958 # aut-num
429a43d1 959 if line.startswith("aut-num:"):
28b08385 960 return self._parse_autnum_block(block, source_key)
6ffd06b5 961
aadac4c5
PM
962 # inetnum
963 if line.startswith("inet6num:") or line.startswith("inetnum:"):
28b08385 964 return self._parse_inetnum_block(block, source_key, validcountries)
aadac4c5 965
6ffd06b5
MT
966 # organisation
967 elif line.startswith("organisation:"):
28b08385 968 return self._parse_org_block(block, source_key)
6ffd06b5 969
28b08385 970 def _parse_autnum_block(self, block, source_key):
6ffd06b5
MT
971 autnum = {}
972 for line in block:
973 # Split line
974 key, val = split_line(line)
975
976 if key == "aut-num":
977 m = re.match(r"^(AS|as)(\d+)", val)
978 if m:
979 autnum["asn"] = m.group(2)
980
0365119d 981 elif key == "org":
e7d612e5 982 autnum[key] = val.upper()
6ffd06b5 983
426e0bee
PM
984 elif key == "descr":
985 # Save the first description line as well...
986 if not key in autnum:
987 autnum[key] = val
988
6ffd06b5 989 # Skip empty objects
426e0bee 990 if not autnum or not "asn" in autnum:
6ffd06b5
MT
991 return
992
426e0bee
PM
993 # Insert a dummy organisation handle into our temporary organisations
994 # table in case the AS does not have an organisation handle set, but
995 # has a description (a quirk often observed in APNIC area), so we can
996 # later display at least some string for this AS.
997 if not "org" in autnum:
998 if "descr" in autnum:
999 autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn")
1000
1001 self.db.execute("INSERT INTO _organizations(handle, name, source) \
1002 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
1003 autnum.get("org"), autnum.get("descr"), source_key,
1004 )
1005 else:
1006 log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
1007 autnum.get("asn"))
1008 return
1009
6ffd06b5 1010 # Insert into database
28b08385
PM
1011 self.db.execute("INSERT INTO _autnums(number, organization, source) \
1012 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
0365119d 1013 organization = excluded.organization",
28b08385 1014 autnum.get("asn"), autnum.get("org"), source_key,
6ffd06b5
MT
1015 )
1016
28b08385 1017 def _parse_inetnum_block(self, block, source_key, validcountries = None):
84187ab5 1018 log.debug("Parsing inetnum block:")
aadac4c5
PM
1019
1020 inetnum = {}
1021 for line in block:
84187ab5 1022 log.debug(line)
aadac4c5
PM
1023
1024 # Split line
1025 key, val = split_line(line)
1026
84187ab5
PM
1027 # Filter any inetnum records which are only referring to IP space
1028 # not managed by that specific RIR...
1029 if key == "netname":
5254e5fc 1030 if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()):
2ba6ed07 1031 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
84187ab5
PM
1032 return
1033
aadac4c5
PM
1034 if key == "inetnum":
1035 start_address, delim, end_address = val.partition("-")
1036
1037 # Strip any excess space
1038 start_address, end_address = start_address.rstrip(), end_address.strip()
1039
845da577
PM
1040 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
1041 if start_address and not (delim or end_address):
1042 try:
1043 start_address = ipaddress.ip_network(start_address, strict=False)
1044 except ValueError:
1045 start_address = start_address.split("/")
38d0faea 1046 ldigits = start_address[0].count(".")
845da577
PM
1047
1048 # How many octets do we need to add?
1049 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
2ad36187 1050 if ldigits == 1:
845da577 1051 start_address = start_address[0] + ".0.0/" + start_address[1]
2ad36187 1052 elif ldigits == 2:
845da577
PM
1053 start_address = start_address[0] + ".0/" + start_address[1]
1054 else:
1055 log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line)
1056 return
1057
1058 try:
1059 start_address = ipaddress.ip_network(start_address, strict=False)
1060 except ValueError:
1061 log.warning("Could not parse line in LACNIC DB format: %s" % line)
1062 return
1063
1064 # Enumerate first and last IP address of this network
1065 end_address = start_address[-1]
1066 start_address = start_address[0]
1067
1068 else:
1069 # Convert to IP address
1070 try:
1071 start_address = ipaddress.ip_address(start_address)
1072 end_address = ipaddress.ip_address(end_address)
1073 except ValueError:
1074 log.warning("Could not parse line: %s" % line)
1075 return
aadac4c5 1076
1814283b 1077 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
aadac4c5
PM
1078
1079 elif key == "inet6num":
1814283b 1080 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
aadac4c5
PM
1081
1082 elif key == "country":
7434e5e0
PM
1083 val = val.upper()
1084
b6b2b331 1085 # Catch RIR data objects with more than one country code...
7434e5e0 1086 if not key in inetnum:
b6b2b331
PM
1087 inetnum[key] = []
1088 else:
7434e5e0 1089 if val in inetnum.get("country"):
b6b2b331
PM
1090 # ... but keep this list distinct...
1091 continue
1092
01e5f0ff
PM
1093 # When people set country codes to "UK", they actually mean "GB"
1094 if val == "UK":
1095 val = "GB"
1096
7434e5e0 1097 inetnum[key].append(val)
aadac4c5 1098
183b2f74
MT
1099 # Parse the geofeed attribute
1100 elif key == "geofeed":
1101 inetnum["geofeed"] = val
1102
1103 # Parse geofeed when used as a remark
45c0536c
MT
1104 elif key == "remarks":
1105 m = re.match(r"^(?:Geofeed)\s+(https://.*)", val)
183b2f74
MT
1106 if m:
1107 inetnum["geofeed"] = m.group(1)
1108
aadac4c5 1109 # Skip empty objects
002deb6b 1110 if not inetnum or not "country" in inetnum:
aadac4c5
PM
1111 return
1112
b6b2b331
PM
1113 # Prepare skipping objects with unknown country codes...
1114 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
1115
1814283b
PM
1116 # Iterate through all networks enumerated from above, check them for plausibility and insert
1117 # them into the database, if _check_parsed_network() succeeded
1118 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
1119 if self._check_parsed_network(single_network):
b6b2b331
PM
1120 # Skip objects with unknown country codes if they are valid to avoid log spam...
1121 if validcountries and invalidcountries:
1122 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
1123 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
48770ca8 1124 break
7138b4ac
PM
1125
1126 # Everything is fine here, run INSERT statement...
b6b2b331
PM
1127 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
1128 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
1129 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
1814283b 1130 )
aadac4c5 1131
183b2f74
MT
1132 # Update any geofeed information
1133 geofeed = inetnum.get("geofeed", None)
183b2f74 1134 if geofeed:
da832d44 1135 self._parse_geofeed(geofeed, single_network)
183b2f74
MT
1136
1137 # Delete any previous geofeeds
1138 else:
1139 self.db.execute("DELETE FROM network_geofeeds WHERE network = %s",
1140 "%s" % single_network)
1141
da832d44
MT
1142 def _parse_geofeed(self, url, single_network):
1143 # Parse the URL
1144 url = urllib.parse.urlparse(url)
1145
1146 # Make sure that this is a HTTPS URL
1147 if not url.scheme == "https":
1148 log.debug("Geofeed URL is not using HTTPS: %s" % geofeed)
1149 return
1150
1151 # Put the URL back together normalized
1152 url = url.geturl()
1153
1154 # Store/update any geofeeds
1155 self.db.execute("""
1156 INSERT INTO
1157 network_geofeeds(
1158 network,
1159 url
1160 )
1161 VALUES(
1162 %s, %s
1163 )
1164 ON CONFLICT (network) DO
1165 UPDATE SET url = excluded.url""",
1166 "%s" % single_network, url,
1167 )
1168
28b08385 1169 def _parse_org_block(self, block, source_key):
6ffd06b5
MT
1170 org = {}
1171 for line in block:
1172 # Split line
1173 key, val = split_line(line)
1174
e7d612e5
PM
1175 if key == "organisation":
1176 org[key] = val.upper()
1177 elif key == "org-name":
6ffd06b5
MT
1178 org[key] = val
1179
1180 # Skip empty objects
1181 if not org:
1182 return
1183
28b08385
PM
1184 self.db.execute("INSERT INTO _organizations(handle, name, source) \
1185 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
0365119d 1186 UPDATE SET name = excluded.name",
28b08385 1187 org.get("organisation"), org.get("org-name"), source_key,
6ffd06b5
MT
1188 )
1189
28b08385 1190 def _parse_line(self, line, source_key, validcountries = None):
429a43d1
MT
1191 # Skip version line
1192 if line.startswith("2"):
1193 return
6ffd06b5 1194
429a43d1
MT
1195 # Skip comments
1196 if line.startswith("#"):
1197 return
6ffd06b5 1198
429a43d1
MT
1199 try:
1200 registry, country_code, type, line = line.split("|", 3)
1201 except:
1202 log.warning("Could not parse line: %s" % line)
1203 return
6ffd06b5 1204
84b175e2
PM
1205 # Skip any lines that are for stats only or do not have a country
1206 # code at all (avoids log spam below)
1207 if not country_code or country_code == '*':
1208 return
1209
1210 # Skip objects with unknown country codes
1211 if validcountries and country_code not in validcountries:
1212 log.warning("Skipping line with bogus country '%s': %s" % \
1213 (country_code, line))
6ffd06b5
MT
1214 return
1215
429a43d1 1216 if type in ("ipv6", "ipv4"):
28b08385 1217 return self._parse_ip_line(country_code, type, line, source_key)
429a43d1 1218
28b08385 1219 def _parse_ip_line(self, country, type, line, source_key):
429a43d1
MT
1220 try:
1221 address, prefix, date, status, organization = line.split("|")
1222 except ValueError:
1223 organization = None
1224
1225 # Try parsing the line without organization
1226 try:
1227 address, prefix, date, status = line.split("|")
1228 except ValueError:
1229 log.warning("Unhandled line format: %s" % line)
1230 return
1231
1232 # Skip anything that isn't properly assigned
1233 if not status in ("assigned", "allocated"):
1234 return
1235
1236 # Cast prefix into an integer
1237 try:
1238 prefix = int(prefix)
1239 except:
1240 log.warning("Invalid prefix: %s" % prefix)
7177031f 1241 return
429a43d1
MT
1242
1243 # Fix prefix length for IPv4
1244 if type == "ipv4":
1245 prefix = 32 - int(math.log(prefix, 2))
1246
1247 # Try to parse the address
1248 try:
1249 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
1250 except ValueError:
1251 log.warning("Invalid IP address: %s" % address)
1252 return
1253
bd341642
PM
1254 if not self._check_parsed_network(network):
1255 return
1256
b6b2b331
PM
1257 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
1258 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
87b3e102 1259 UPDATE SET country = excluded.country",
b6b2b331 1260 "%s" % network, country, [country], source_key,
6ffd06b5
MT
1261 )
1262
18a72eac
MT
1263 def _import_as_names_from_arin(self, downloader):
1264 # Delete all previously imported content
1265 self.db.execute("DELETE FROM autnums WHERE source = %s", "ARIN")
92403f39 1266
18a72eac
MT
1267 # Try to retrieve the feed from ftp.arin.net
1268 feed = downloader.request_lines("https://ftp.arin.net/pub/resource_registry_service/asns.csv")
f4f3d8ad 1269
18a72eac
MT
1270 # Walk through the file
1271 for line in csv.DictReader(feed, dialect="arin"):
1272 log.debug("Processing object: %s" % line)
92403f39 1273
18a72eac
MT
1274 # Fetch status
1275 status = line.get("Status")
92403f39 1276
18a72eac
MT
1277 # We are only interested in anything managed by ARIN
1278 if not status == "Full Registry Services":
85e44d9a 1279 continue
92403f39 1280
18a72eac
MT
1281 # Fetch organization name
1282 name = line.get("Org Name")
92403f39 1283
18a72eac
MT
1284 # Extract ASNs
1285 first_asn = line.get("Start AS Number")
1286 last_asn = line.get("End AS Number")
f4f3d8ad 1287
18a72eac
MT
1288 # Cast to a number
1289 try:
1290 first_asn = int(first_asn)
1291 except TypeError as e:
1292 log.warning("Could not parse ASN '%s'" % first_asn)
85e44d9a 1293 continue
92403f39 1294
18a72eac
MT
1295 try:
1296 last_asn = int(last_asn)
1297 except TypeError as e:
1298 log.warning("Could not parse ASN '%s'" % last_asn)
f4f3d8ad 1299 continue
92403f39 1300
18a72eac
MT
1301 # Check if the range is valid
1302 if last_asn < first_asn:
1303 log.warning("Invalid ASN range %s-%s" % (first_asn, last_asn))
1304
1305 # Insert everything into the database
1306 for asn in range(first_asn, last_asn + 1):
1307 if not self._check_parsed_asn(asn):
1308 log.warning("Skipping invalid ASN %s" % asn)
1309 continue
1310
1311 self.db.execute("""
1312 INSERT INTO
1313 autnums
1314 (
1315 number,
1316 name,
1317 source
1318 )
1319 VALUES
1320 (
1321 %s, %s, %s
1322 )
1323 ON CONFLICT
1324 (
1325 number
1326 )
1327 DO NOTHING
1328 """, asn, name, "ARIN",
1329 )
92403f39 1330
83d61c46
MT
1331 def handle_update_announcements(self, ns):
1332 server = ns.server[0]
1333
22d8d199
MT
1334 with self.db.transaction():
1335 if server.startswith("/"):
1336 self._handle_update_announcements_from_bird(server)
22d8d199
MT
1337
1338 # Purge anything we never want here
1339 self.db.execute("""
1340 -- Delete default routes
1341 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
1342
1343 -- Delete anything that is not global unicast address space
1344 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
1345
1346 -- DELETE "current network" address space
1347 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
1348
1349 -- DELETE local loopback address space
1350 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
1351
1352 -- DELETE RFC 1918 address space
1353 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
1354 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
1355 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
1356
1357 -- DELETE test, benchmark and documentation address space
1358 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
1359 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
1360 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
1361 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
1362 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
1363
1364 -- DELETE CGNAT address space (RFC 6598)
1365 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
1366
1367 -- DELETE link local address space
1368 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
1369
b4d5b2a6 1370 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
22d8d199 1371 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
b4d5b2a6 1372 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
22d8d199
MT
1373
1374 -- DELETE multicast and reserved address space
1375 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
1376 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
1377
1378 -- Delete networks that are too small to be in the global routing table
1379 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
1380 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
1381
1382 -- Delete any non-public or reserved ASNs
1383 DELETE FROM announcements WHERE NOT (
1384 (autnum >= 1 AND autnum <= 23455)
1385 OR
1386 (autnum >= 23457 AND autnum <= 64495)
1387 OR
1388 (autnum >= 131072 AND autnum <= 4199999999)
1389 );
1390
1391 -- Delete everything that we have not seen for 14 days
1392 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
1393 """)
1394
1395 def _handle_update_announcements_from_bird(self, server):
1396 # Pre-compile the regular expression for faster searching
35e5f041 1397 route = re.compile(b"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
22d8d199
MT
1398
1399 log.info("Requesting routing table from Bird (%s)" % server)
1400
35e5f041
MT
1401 aggregated_networks = []
1402
22d8d199
MT
1403 # Send command to list all routes
1404 for line in self._bird_cmd(server, "show route"):
1405 m = route.match(line)
1406 if not m:
b767db8d
MT
1407 # Skip empty lines
1408 if not line:
1409 pass
1410
45fa97cd 1411 # Ignore any header lines with the name of the routing table
b767db8d
MT
1412 elif line.startswith(b"Table"):
1413 pass
45fa97cd
MT
1414
1415 # Log anything else
b767db8d
MT
1416 else:
1417 log.debug("Could not parse line: %s" % line.decode())
1418
22d8d199
MT
1419 continue
1420
1421 # Fetch the extracted network and ASN
1422 network, autnum = m.groups()
1423
35e5f041
MT
1424 # Decode into strings
1425 if network:
1426 network = network.decode()
1427 if autnum:
1428 autnum = autnum.decode()
1429
1430 # Collect all aggregated networks
1431 if not autnum:
1432 log.debug("%s is an aggregated network" % network)
1433 aggregated_networks.append(network)
1434 continue
1435
22d8d199
MT
1436 # Insert it into the database
1437 self.db.execute("INSERT INTO announcements(network, autnum) \
1438 VALUES(%s, %s) ON CONFLICT (network) DO \
1439 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
35e5f041 1440 network, autnum,
22d8d199
MT
1441 )
1442
35e5f041
MT
1443 # Process any aggregated networks
1444 for network in aggregated_networks:
1445 log.debug("Processing aggregated network %s" % network)
1446
1447 # Run "show route all" for each network
1448 for line in self._bird_cmd(server, "show route %s all" % network):
1449 # Try finding the path
1450 m = re.match(b"\s+BGP\.as_path:.* (\d+) {\d+}$", line)
1451 if m:
1452 # Select the last AS number in the path
1453 autnum = m.group(1).decode()
1454
1455 # Insert it into the database
1456 self.db.execute("INSERT INTO announcements(network, autnum) \
1457 VALUES(%s, %s) ON CONFLICT (network) DO \
1458 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1459 network, autnum,
1460 )
1461
1462 # We don't need to process any more
1463 break
1464
22d8d199
MT
1465 def _bird_cmd(self, socket_path, command):
1466 # Connect to the socket
1467 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1468 s.connect(socket_path)
cedee656 1469
22d8d199
MT
1470 # Allocate some buffer
1471 buffer = b""
83d61c46 1472
bbcefb38
MT
1473 log.debug("Sending Bird command: %s" % command)
1474
22d8d199
MT
1475 # Send the command
1476 s.send(b"%s\n" % command.encode())
209c04b6 1477
22d8d199
MT
1478 while True:
1479 # Fill up the buffer
1480 buffer += s.recv(4096)
209c04b6 1481
22d8d199
MT
1482 while True:
1483 # Search for the next newline
1484 pos = buffer.find(b"\n")
209c04b6 1485
22d8d199
MT
1486 # If we cannot find one, we go back and read more data
1487 if pos <= 0:
1488 break
209c04b6 1489
22d8d199
MT
1490 # Cut after the newline character
1491 pos += 1
b89cee80 1492
22d8d199
MT
1493 # Split the line we want and keep the rest in buffer
1494 line, buffer = buffer[:pos], buffer[pos:]
83d61c46 1495
bbcefb38
MT
1496 # Try parsing any status lines
1497 if len(line) > 4 and line[:4].isdigit() and line[4] in (32, 45):
1498 code, delim, line = int(line[:4]), line[4], line[5:]
1499
1500 log.debug("Received response code %s from bird" % code)
1501
1502 # End of output
1503 if code == 0:
1504 return
1505
1506 # Ignore hello line
1507 elif code == 1:
1508 continue
83d61c46 1509
22d8d199
MT
1510 # Otherwise return the line
1511 yield line
83d61c46 1512
0fcdd689 1513 def handle_update_geofeeds(self, ns):
ef8d1f67
MT
1514 # Sync geofeeds
1515 with self.db.transaction():
1516 # Delete all geofeeds which are no longer linked
1517 self.db.execute("""
1518 DELETE FROM
1519 geofeeds
1520 WHERE
1521 NOT EXISTS (
1522 SELECT
1523 1
1524 FROM
1525 network_geofeeds
1526 WHERE
1527 geofeeds.url = network_geofeeds.url
1528 )""",
1529 )
1530
1531 # Copy all geofeeds
1532 self.db.execute("""
1533 INSERT INTO
1534 geofeeds(
1535 url
1536 )
1537 SELECT
1538 url
1539 FROM
1540 network_geofeeds
1541 ON CONFLICT (url)
1542 DO NOTHING
1543 """,
1544 )
1545
0fcdd689
MT
1546 # Fetch all Geofeeds that require an update
1547 geofeeds = self.db.query("""
1548 SELECT
1549 id,
1550 url
1551 FROM
1552 geofeeds
1553 WHERE
1554 updated_at IS NULL
1555 OR
1556 updated_at <= CURRENT_TIMESTAMP - INTERVAL '1 week'
1557 ORDER BY
1558 id
1559 """)
1560
1561 with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
1562 results = executor.map(self._fetch_geofeed, geofeeds)
1563
fa33b321 1564 # Fetch all results to raise any exceptions
0fcdd689 1565 for result in results:
fa33b321 1566 pass
0fcdd689 1567
a4e50c7a
MT
1568 # Delete data from any feeds that did not update in the last two weeks
1569 with self.db.transaction():
1570 self.db.execute("""
1571 DELETE FROM
1572 geofeed_networks
1573 WHERE
1574 geofeed_networks.geofeed_id IN (
1575 SELECT
1576 geofeeds.id
1577 FROM
1578 geofeeds
1579 WHERE
1580 updated_at IS NULL
1581 OR
1582 updated_at <= CURRENT_TIMESTAMP - INTERVAL '2 weeks'
1583 )
1584 """)
1585
0fcdd689
MT
1586 def _fetch_geofeed(self, geofeed):
1587 log.debug("Fetching Geofeed %s" % geofeed.url)
1588
1589 with self.db.transaction():
1590 # Open the URL
1591 try:
1592 req = urllib.request.Request(geofeed.url, headers={
1593 "User-Agent" : "location/%s" % location.__version__,
1594
1595 # We expect some plain text file in CSV format
1596 "Accept" : "text/csv, text/plain",
1597 })
1598
1599 # XXX set proxy
1600
1601 # Send the request
1602 with urllib.request.urlopen(req, timeout=10) as f:
1603 # Remove any previous data
1604 self.db.execute("DELETE FROM geofeed_networks \
1605 WHERE geofeed_id = %s", geofeed.id)
1606
5e632485
MT
1607 lineno = 0
1608
0fcdd689
MT
1609 # Read the output line by line
1610 for line in f:
5e632485
MT
1611 lineno += 1
1612
56f147ce
MT
1613 try:
1614 line = line.decode()
1615
1616 # Ignore any lines we cannot decode
1617 except UnicodeDecodeError:
1618 log.debug("Could not decode line %s in %s" \
1619 % (lineno, geofeed.url))
1620 continue
0fcdd689
MT
1621
1622 # Strip any newline
1623 line = line.rstrip()
1624
1625 # Skip empty lines
1626 if not line:
1627 continue
1628
1629 # Try to parse the line
1630 try:
1631 fields = line.split(",", 5)
1632 except ValueError:
1633 log.debug("Could not parse line: %s" % line)
1634 continue
1635
1636 # Check if we have enough fields
1637 if len(fields) < 4:
1638 log.debug("Not enough fields in line: %s" % line)
1639 continue
1640
1641 # Fetch all fields
1642 network, country, region, city, = fields[:4]
1643
1644 # Try to parse the network
1645 try:
1646 network = ipaddress.ip_network(network, strict=False)
1647 except ValueError:
1648 log.debug("Could not parse network: %s" % network)
1649 continue
1650
72908243 1651 # Strip any excess whitespace from country codes
4ed16a14
MT
1652 country = country.strip()
1653
1654 # Make the country code uppercase
1655 country = country.upper()
72908243
MT
1656
1657 # Check the country code
5e632485
MT
1658 if not country:
1659 log.debug("Empty country code in Geofeed %s line %s" \
1660 % (geofeed.url, lineno))
1661 continue
1662
1663 elif not location.country_code_is_valid(country):
8228a917
MT
1664 log.debug("Invalid country code in Geofeed %s:%s: %s" \
1665 % (geofeed.url, lineno, country))
72908243 1666 continue
0fcdd689
MT
1667
1668 # Write this into the database
1669 self.db.execute("""
1670 INSERT INTO
1671 geofeed_networks (
1672 geofeed_id,
1673 network,
1674 country,
1675 region,
1676 city
1677 )
1678 VALUES (%s, %s, %s, %s, %s)""",
1679 geofeed.id,
1680 "%s" % network,
1681 country,
1682 region,
1683 city,
1684 )
1685
1686 # Catch any HTTP errors
1687 except urllib.request.HTTPError as e:
ce308393
MT
1688 self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
1689 WHERE id = %s", e.code, "%s" % e, geofeed.id)
0fcdd689 1690
bf07adcc
MT
1691 # Remove any previous data when the feed has been deleted
1692 if e.code == 404:
1693 self.db.execute("DELETE FROM geofeed_networks \
1694 WHERE geofeed_id = %s", geofeed.id)
1695
ce308393 1696 # Catch any other errors and connection timeouts
a6fedd9f 1697 except (http.client.InvalidURL, urllib.request.URLError, TimeoutError) as e:
ce308393 1698 log.debug("Could not fetch URL %s: %s" % (geofeed.url, e))
0fcdd689 1699
ce308393
MT
1700 self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
1701 WHERE id = %s", 599, "%s" % e, geofeed.id)
15437681 1702
0fcdd689
MT
1703 # Mark the geofeed as updated
1704 else:
1705 self.db.execute("""
1706 UPDATE
1707 geofeeds
1708 SET
1709 updated_at = CURRENT_TIMESTAMP,
ce308393
MT
1710 status = NULL,
1711 error = NULL
0fcdd689
MT
1712 WHERE
1713 id = %s""",
1714 geofeed.id,
1715 )
1716
d7fc3057
MT
1717 def handle_update_overrides(self, ns):
1718 with self.db.transaction():
68fdeb3b
PM
1719 # Only drop manually created overrides, as we can be reasonably sure to have them,
1720 # and preserve the rest. If appropriate, it is deleted by correspondent functions.
d7fc3057 1721 self.db.execute("""
68fdeb3b
PM
1722 DELETE FROM autnum_overrides WHERE source = 'manual';
1723 DELETE FROM network_overrides WHERE source = 'manual';
d7fc3057
MT
1724 """)
1725
1726 for file in ns.files:
1727 log.info("Reading %s..." % file)
1728
1729 with open(file, "rb") as f:
1730 for type, block in location.importer.read_blocks(f):
1731 if type == "net":
1732 network = block.get("net")
1733 # Try to parse and normalise the network
1734 try:
1735 network = ipaddress.ip_network(network, strict=False)
1736 except ValueError as e:
1737 log.warning("Invalid IP network: %s: %s" % (network, e))
1738 continue
1739
94dfab8c
MT
1740 # Prevent that we overwrite all networks
1741 if network.prefixlen == 0:
1742 log.warning("Skipping %s: You cannot overwrite default" % network)
1743 continue
1744
d7fc3057
MT
1745 self.db.execute("""
1746 INSERT INTO network_overrides(
1747 network,
1748 country,
39ee3120 1749 source,
d7fc3057
MT
1750 is_anonymous_proxy,
1751 is_satellite_provider,
e17e804e
PM
1752 is_anycast,
1753 is_drop
39ee3120 1754 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
d7fc3057
MT
1755 ON CONFLICT (network) DO NOTHING""",
1756 "%s" % network,
1757 block.get("country"),
39ee3120 1758 "manual",
28d29b7c
MT
1759 self._parse_bool(block, "is-anonymous-proxy"),
1760 self._parse_bool(block, "is-satellite-provider"),
1761 self._parse_bool(block, "is-anycast"),
e17e804e 1762 self._parse_bool(block, "drop"),
d7fc3057
MT
1763 )
1764
f476cdfd
MT
1765 elif type == "aut-num":
1766 autnum = block.get("aut-num")
d7fc3057
MT
1767
1768 # Check if AS number begins with "AS"
1769 if not autnum.startswith("AS"):
1770 log.warning("Invalid AS number: %s" % autnum)
1771 continue
1772
1773 # Strip "AS"
1774 autnum = autnum[2:]
1775
1776 self.db.execute("""
1777 INSERT INTO autnum_overrides(
1778 number,
1779 name,
bd1aa6a1 1780 country,
39ee3120 1781 source,
d7fc3057
MT
1782 is_anonymous_proxy,
1783 is_satellite_provider,
e17e804e
PM
1784 is_anycast,
1785 is_drop
39ee3120 1786 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
d7fc3057 1787 ON CONFLICT DO NOTHING""",
bd1aa6a1
MT
1788 autnum,
1789 block.get("name"),
1790 block.get("country"),
39ee3120 1791 "manual",
28d29b7c
MT
1792 self._parse_bool(block, "is-anonymous-proxy"),
1793 self._parse_bool(block, "is-satellite-provider"),
1794 self._parse_bool(block, "is-anycast"),
e17e804e 1795 self._parse_bool(block, "drop"),
d7fc3057
MT
1796 )
1797
1798 else:
03d24a9b 1799 log.warning("Unsupported type: %s" % type)
d7fc3057 1800
93aad7f7
MT
1801 def handle_update_feeds(self, ns):
1802 """
1803 Update any third-party feeds
1804 """
a775372b
MT
1805 success = True
1806
1807 # Create a downloader
1808 downloader = location.importer.Downloader()
1809
1810 feeds = (
1811 # AWS IP Ranges
1812 ("AWS-IP-RANGES", self._import_aws_ip_ranges, "https://ip-ranges.amazonaws.com/ip-ranges.json"),
377cead4
MT
1813
1814 # Spamhaus DROP
1815 ("SPAMHAUS-DROP", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/drop.txt"),
1816 ("SPAMHAUS-EDROP", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/edrop.txt"),
1817 ("SPAMHAUS-DROPV6", self._import_spamhaus_drop, "https://www.spamhaus.org/drop/dropv6.txt"),
a775372b
MT
1818 )
1819
1820 # Walk through all feeds
1821 for name, callback, url, *args in feeds:
f7f8e714
MT
1822 # Skip any feeds that were not requested on the command line
1823 if ns.feeds and not name in ns.feeds:
1824 continue
1825
a775372b
MT
1826 try:
1827 self._process_feed(downloader, name, callback, url, *args)
1828
1829 # Log an error but continue if an exception occurs
1830 except Exception as e:
1831 log.error("Error processing feed '%s': %s" % (name, e))
1832 success = False
93aad7f7
MT
1833
1834 # Spamhaus
377cead4 1835 #self._update_feed_for_spamhaus_drop()
93aad7f7 1836
a775372b
MT
1837 # Return status
1838 return 0 if success else 1
dcef2ba4 1839
a775372b
MT
1840 def _process_feed(self, downloader, name, callback, url, *args):
1841 """
1842 Processes one feed
1843 """
1844 # Open the URL
1845 f = downloader.retrieve(url)
85e44d9a 1846
a775372b
MT
1847 with self.db.transaction():
1848 # Drop any previous content
1849 self.db.execute("DELETE FROM autnum_feeds WHERE source = %s", name)
1850 self.db.execute("DELETE FROM network_feeds WHERE source = %s", name)
dcef2ba4 1851
a775372b
MT
1852 # Call the callback to process the feed
1853 return callback(name, f, *args)
1854
1855 def _import_aws_ip_ranges(self, name, f):
1856 # Parse the feed
c30e421e 1857 feed = json.load(f)
68fdeb3b 1858
99e4a4eb 1859 # Set up a dictionary for mapping a region name to a country. Unfortunately,
dcef2ba4
PM
1860 # there seems to be no machine-readable version available of this other than
1861 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1862 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1863 # was helpful here as well.
1864 aws_region_country_map = {
99e4a4eb
MT
1865 # Africa
1866 "af-south-1" : "ZA",
1867
1868 # Asia
1869 "il-central-1" : "IL", # Tel Aviv
1870
1871 # Asia/Pacific
1872 "ap-northeast-1" : "JP",
1873 "ap-northeast-2" : "KR",
1874 "ap-northeast-3" : "JP",
1875 "ap-east-1" : "HK",
1876 "ap-south-1" : "IN",
1877 "ap-south-2" : "IN",
1878 "ap-southeast-1" : "SG",
1879 "ap-southeast-2" : "AU",
1880 "ap-southeast-3" : "MY",
1881 "ap-southeast-4" : "AU",
1882 "ap-southeast-5" : "NZ", # Auckland, NZ
1883 "ap-southeast-6" : "AP", # XXX: Precise location not documented anywhere
1884
1885 # Canada
1886 "ca-central-1" : "CA",
1887 "ca-west-1" : "CA",
1888
1889 # Europe
1890 "eu-central-1" : "DE",
1891 "eu-central-2" : "CH",
1892 "eu-north-1" : "SE",
1893 "eu-west-1" : "IE",
1894 "eu-west-2" : "GB",
1895 "eu-west-3" : "FR",
1896 "eu-south-1" : "IT",
1897 "eu-south-2" : "ES",
1898
1899 # Middle East
1900 "me-central-1" : "AE",
1901 "me-south-1" : "BH",
1902
1903 # South America
1904 "sa-east-1" : "BR",
1905
1906 # Undocumented, likely located in Berlin rather than Frankfurt
1907 "eusc-de-east-1" : "DE",
1908 }
dcef2ba4 1909
c30e421e
MT
1910 # Collect a list of all networks
1911 prefixes = feed.get("ipv6_prefixes", []) + feed.get("prefixes", [])
dcef2ba4 1912
c30e421e
MT
1913 for prefix in prefixes:
1914 # Fetch network
1915 network = prefix.get("ipv6_prefix") or prefix.get("ip_prefix")
1916
1917 # Parse the network
a775372b 1918 try:
c30e421e
MT
1919 network = ipaddress.ip_network(network)
1920 except ValuleError as e:
1921 log.warning("%s: Unable to parse prefix %s" % (name, network))
a775372b 1922 continue
dcef2ba4 1923
a775372b
MT
1924 # Sanitize parsed networks...
1925 if not self._check_parsed_network(network):
1926 continue
dcef2ba4 1927
c30e421e
MT
1928 # Fetch the region
1929 region = prefix.get("region")
1930
1931 # Set some defaults
a775372b
MT
1932 cc = None
1933 is_anycast = False
1934
c30e421e
MT
1935 # Fetch the CC from the dictionary
1936 try:
a775372b 1937 cc = aws_region_country_map[region]
dcef2ba4 1938
c30e421e
MT
1939 # If we couldn't find anything, let's try something else...
1940 except KeyError as e:
1941 # Find anycast networks
1942 if region == "GLOBAL":
1943 is_anycast = True
1944
1945 # Everything that starts with us- is probably in the United States
1946 elif region.startswith("us-"):
1947 cc = "US"
1948
1949 # Everything that starts with cn- is probably China
1950 elif region.startswith("cn-"):
1951 cc = "CN"
1952
1953 # Log a warning for anything else
1954 else:
1955 log.warning("%s: Could not determine country code for AWS region %s" \
1956 % (name, region))
1957 continue
dcef2ba4 1958
c30e421e 1959 # Write to database
a775372b
MT
1960 self.db.execute("""
1961 INSERT INTO
1962 network_feeds
1963 (
1964 network,
1965 source,
1966 country,
1967 is_anycast
dcef2ba4 1968 )
a775372b
MT
1969 VALUES
1970 (
1971 %s, %s, %s, %s
1972 )
1973 ON CONFLICT (network, source) DO NOTHING
c30e421e 1974 """, "%s" % network, name, cc, is_anycast,
a775372b 1975 )
dcef2ba4 1976
377cead4
MT
1977 def _import_spamhaus_drop(self, name, f):
1978 """
1979 Import Spamhaus DROP IP feeds
1980 """
1981 # Count all lines
1982 lines = 0
69b3d894 1983
377cead4
MT
1984 # Walk through all lines
1985 for line in f:
1986 # Decode line
1987 line = line.decode("utf-8")
69b3d894 1988
377cead4
MT
1989 # Strip off any comments
1990 line, _, comment = line.partition(";")
69b3d894 1991
377cead4
MT
1992 # Ignore empty lines
1993 if not line:
1994 continue
85e44d9a 1995
377cead4
MT
1996 # Strip any excess whitespace
1997 line = line.strip()
69b3d894 1998
377cead4
MT
1999 # Increment line counter
2000 lines += 1
5acac2a4 2001
377cead4
MT
2002 # Parse the network
2003 try:
2004 network = ipaddress.ip_network(line)
2005 except ValueError as e:
2006 log.warning("%s: Could not parse network: %s - %s" % (name, line, e))
2007 continue
69b3d894 2008
377cead4
MT
2009 # Check network
2010 if not self._check_parsed_network(network):
2011 log.warning("%s: Skipping bogus network: %s" % (name, network))
2012 continue
69b3d894 2013
377cead4
MT
2014 # Insert into the database
2015 self.db.execute("""
2016 INSERT INTO
2017 network_feeds
2018 (
2019 network,
2020 source,
2021 is_drop
2022 )
2023 VALUES
2024 (
2025 %s, %s, %s
2026 )""", "%s" % network, name, True,
2027 )
69b3d894 2028
377cead4
MT
2029 # Raise an exception if we could not import anything
2030 if not lines:
2031 raise RuntimeError("Received bogus feed %s with no data" % name)
69b3d894 2032
377cead4
MT
2033 def _update_feed_for_spamhaus_drop(self):
2034 downloader = location.importer.Downloader()
2035
2036 asn_lists = [
2037 ("SPAMHAUS-ASNDROP", "https://www.spamhaus.org/drop/asndrop.json")
2038 ]
69b3d894 2039
5acac2a4 2040 for name, url in asn_lists:
85e44d9a
MT
2041 # Fetch URL
2042 f = downloader.retrieve(url)
69b3d894 2043
5acac2a4
PM
2044 # Split into lines
2045 fcontent = f.readlines()
2046
69b3d894 2047 with self.db.transaction():
5acac2a4
PM
2048 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
2049 # downloads.
2050 if len(fcontent) > 10:
99d15292 2051 self.db.execute("DELETE FROM autnum_feeds WHERE source = %s", name)
5acac2a4 2052 else:
4efa6d84 2053 log.warning("%s (%s) returned likely bogus file, ignored" % (name, url))
5acac2a4
PM
2054 continue
2055
2056 # Iterate through every line, filter comments and add remaining ASNs to
2057 # the override table in case they are valid...
5c7cfeb2 2058 for sline in fcontent:
69b3d894
PM
2059 # The response is assumed to be encoded in UTF-8...
2060 sline = sline.decode("utf-8")
2061
5c7cfeb2
PM
2062 # Load every line as a JSON object and try to obtain an ASN from it...
2063 try:
2064 lineobj = json.loads(sline)
2065 except json.decoder.JSONDecodeError:
2066 log.error("Unable to parse line as a JSON object: %s" % sline)
69b3d894
PM
2067 continue
2068
5c7cfeb2
PM
2069 # Skip line contiaining file metadata
2070 try:
2071 type = lineobj["type"]
69b3d894 2072
5c7cfeb2
PM
2073 if type == "metadata":
2074 continue
2075 except KeyError:
2076 pass
69b3d894 2077
5c7cfeb2
PM
2078 try:
2079 asn = lineobj["asn"]
2080 as_name = lineobj["asname"]
2081 except KeyError:
2082 log.warning("Unable to extract necessary information from line: %s" % sline)
2083 continue
69b3d894
PM
2084
2085 # Filter invalid ASNs...
2086 if not self._check_parsed_asn(asn):
5acac2a4
PM
2087 log.warning("Skipping bogus ASN found in %s (%s): %s" % \
2088 (name, url, asn))
69b3d894
PM
2089 continue
2090
2091 # Conduct SQL statement...
2092 self.db.execute("""
99d15292
MT
2093 INSERT INTO
2094 autnum_feeds
2095 (
69b3d894
PM
2096 number,
2097 source,
2098 is_drop
99d15292
MT
2099 )
2100 VALUES
2101 (
2102 %s, %s, %s
2103 )""", "%s" % asn, name, True,
69b3d894
PM
2104 )
2105
28d29b7c
MT
2106 @staticmethod
2107 def _parse_bool(block, key):
2108 val = block.get(key)
2109
2110 # There is no point to proceed when we got None
2111 if val is None:
2112 return
2113
2114 # Convert to lowercase
2115 val = val.lower()
2116
2117 # True
2118 if val in ("yes", "1"):
2119 return True
2120
2121 # False
2122 if val in ("no", "0"):
2123 return False
2124
2125 # Default to None
2126 return None
2127
8084b33a
MT
2128 def handle_import_countries(self, ns):
2129 with self.db.transaction():
2130 # Drop all data that we have
2131 self.db.execute("TRUNCATE TABLE countries")
2132
2133 for file in ns.file:
2134 for line in file:
2135 line = line.rstrip()
2136
2137 # Ignore any comments
2138 if line.startswith("#"):
2139 continue
2140
2141 try:
2142 country_code, continent_code, name = line.split(maxsplit=2)
2143 except:
2144 log.warning("Could not parse line: %s" % line)
2145 continue
2146
2147 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
2148 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
2149
6ffd06b5
MT
2150
2151def split_line(line):
2152 key, colon, val = line.partition(":")
2153
2154 # Strip any excess space
2155 key = key.strip()
2156 val = val.strip()
78ff0cf2 2157
6ffd06b5 2158 return key, val
78ff0cf2
MT
2159
2160def main():
2161 # Run the command line interface
2162 c = CLI()
2163 c.run()
2164
2165main()