]> git.ipfire.org Git - people/ms/libloc.git/blame - src/scripts/location-importer.in
importer: Import Spamhaus DROP feeds into the new feeds table
[people/ms/libloc.git] / src / scripts / location-importer.in
CommitLineData
78ff0cf2
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
5c7cfeb2 6# Copyright (C) 2020-2024 IPFire Development Team <info@ipfire.org> #
78ff0cf2
MT
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import argparse
0fcdd689 21import concurrent.futures
f4f3d8ad 22import csv
a6fedd9f 23import http.client
6ffd06b5 24import ipaddress
dcef2ba4 25import json
78ff0cf2 26import logging
6ffd06b5
MT
27import math
28import re
22d8d199 29import socket
78ff0cf2 30import sys
3ce28dea 31import urllib.error
78ff0cf2
MT
32
33# Load our location module
34import location
29c6fa22 35import location.database
3192b66c 36import location.importer
78ff0cf2
MT
37from location.i18n import _
38
39# Initialise logging
40log = logging.getLogger("location.importer")
41log.propagate = 1
42
43fe570c
PM
43# Define constants
44VALID_ASN_RANGES = (
45 (1, 23455),
46 (23457, 64495),
47 (131072, 4199999999),
48)
49
18a72eac
MT
50# Configure the CSV parser for ARIN
51csv.register_dialect("arin", delimiter=",", quoting=csv.QUOTE_ALL, quotechar="\"")
43fe570c 52
78ff0cf2
MT
53class CLI(object):
54 def parse_cli(self):
55 parser = argparse.ArgumentParser(
56 description=_("Location Importer Command Line Interface"),
57 )
6ffd06b5 58 subparsers = parser.add_subparsers()
78ff0cf2
MT
59
60 # Global configuration flags
61 parser.add_argument("--debug", action="store_true",
62 help=_("Enable debug output"))
bc1f5f53
MT
63 parser.add_argument("--quiet", action="store_true",
64 help=_("Enable quiet mode"))
78ff0cf2
MT
65
66 # version
67 parser.add_argument("--version", action="version",
68 version="%(prog)s @VERSION@")
69
29c6fa22
MT
70 # Database
71 parser.add_argument("--database-host", required=True,
72 help=_("Database Hostname"), metavar=_("HOST"))
73 parser.add_argument("--database-name", required=True,
74 help=_("Database Name"), metavar=_("NAME"))
75 parser.add_argument("--database-username", required=True,
76 help=_("Database Username"), metavar=_("USERNAME"))
77 parser.add_argument("--database-password", required=True,
78 help=_("Database Password"), metavar=_("PASSWORD"))
79
0983f3dd
MT
80 # Write Database
81 write = subparsers.add_parser("write", help=_("Write database to file"))
82 write.set_defaults(func=self.handle_write)
83 write.add_argument("file", nargs=1, help=_("Database File"))
84 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
1164d876 85 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
0983f3dd
MT
86 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
87 write.add_argument("--description", nargs="?", help=_("Sets a description"))
88 write.add_argument("--license", nargs="?", help=_("Sets the license"))
b904896a 89 write.add_argument("--version", type=int, help=_("Database Format Version"))
0983f3dd 90
6ffd06b5
MT
91 # Update WHOIS
92 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
93 update_whois.set_defaults(func=self.handle_update_whois)
94
83d61c46
MT
95 # Update announcements
96 update_announcements = subparsers.add_parser("update-announcements",
97 help=_("Update BGP Annoucements"))
98 update_announcements.set_defaults(func=self.handle_update_announcements)
99 update_announcements.add_argument("server", nargs=1,
100 help=_("Route Server to connect to"), metavar=_("SERVER"))
101
0fcdd689
MT
102 # Update geofeeds
103 update_geofeeds = subparsers.add_parser("update-geofeeds",
104 help=_("Update Geofeeds"))
105 update_geofeeds.set_defaults(func=self.handle_update_geofeeds)
106
d7fc3057
MT
107 # Update overrides
108 update_overrides = subparsers.add_parser("update-overrides",
109 help=_("Update overrides"),
110 )
111 update_overrides.add_argument(
112 "files", nargs="+", help=_("Files to import"),
113 )
114 update_overrides.set_defaults(func=self.handle_update_overrides)
115
8084b33a
MT
116 # Import countries
117 import_countries = subparsers.add_parser("import-countries",
118 help=_("Import countries"),
119 )
120 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
121 help=_("File to import"))
122 import_countries.set_defaults(func=self.handle_import_countries)
123
78ff0cf2
MT
124 args = parser.parse_args()
125
bc1f5f53 126 # Configure logging
78ff0cf2 127 if args.debug:
f9de5e61 128 location.logger.set_level(logging.DEBUG)
bc1f5f53
MT
129 elif args.quiet:
130 location.logger.set_level(logging.WARNING)
78ff0cf2 131
6ffd06b5
MT
132 # Print usage if no action was given
133 if not "func" in args:
134 parser.print_usage()
135 sys.exit(2)
136
78ff0cf2
MT
137 return args
138
139 def run(self):
140 # Parse command line arguments
141 args = self.parse_cli()
142
29c6fa22 143 # Initialise database
6ffd06b5 144 self.db = self._setup_database(args)
29c6fa22 145
78ff0cf2 146 # Call function
6ffd06b5 147 ret = args.func(args)
78ff0cf2
MT
148
149 # Return with exit code
150 if ret:
151 sys.exit(ret)
152
153 # Otherwise just exit
154 sys.exit(0)
155
29c6fa22
MT
156 def _setup_database(self, ns):
157 """
158 Initialise the database
159 """
160 # Connect to database
161 db = location.database.Connection(
162 host=ns.database_host, database=ns.database_name,
163 user=ns.database_username, password=ns.database_password,
164 )
165
166 with db.transaction():
167 db.execute("""
83d61c46
MT
168 -- announcements
169 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
170 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
171 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
172 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
173 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
a1707d89 174 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
83d61c46 175
6ffd06b5 176 -- autnums
0983f3dd 177 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
26f06e70 178 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
6ffd06b5
MT
179 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
180
8084b33a
MT
181 -- countries
182 CREATE TABLE IF NOT EXISTS countries(
183 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
184 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
185
429a43d1 186 -- networks
83d61c46 187 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
b6b2b331 188 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
26f06e70 189 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
429a43d1 190 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
002deb6b 191 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
83d61c46 192 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
d7fc3057 193
183b2f74 194 -- geofeeds
0fcdd689
MT
195 CREATE TABLE IF NOT EXISTS geofeeds(
196 id serial primary key,
197 url text,
198 status integer default null,
199 updated_at timestamp without time zone default null
200 );
ce308393 201 ALTER TABLE geofeeds ADD COLUMN IF NOT EXISTS error text;
0fcdd689
MT
202 CREATE UNIQUE INDEX IF NOT EXISTS geofeeds_unique
203 ON geofeeds(url);
204 CREATE TABLE IF NOT EXISTS geofeed_networks(
205 geofeed_id integer references geofeeds(id) on delete cascade,
206 network inet,
207 country text,
208 region text,
209 city text
210 );
211 CREATE INDEX IF NOT EXISTS geofeed_networks_geofeed_id
212 ON geofeed_networks(geofeed_id);
213 CREATE INDEX IF NOT EXISTS geofeed_networks_search
47fc42fb 214 ON geofeed_networks USING GIST(network inet_ops);
183b2f74
MT
215 CREATE TABLE IF NOT EXISTS network_geofeeds(network inet, url text);
216 CREATE UNIQUE INDEX IF NOT EXISTS network_geofeeds_unique
217 ON network_geofeeds(network);
7f0a6add
MT
218 CREATE INDEX IF NOT EXISTS network_geofeeds_search
219 ON network_geofeeds USING GIST(network inet_ops);
220 CREATE INDEX IF NOT EXISTS network_geofeeds_url
221 ON network_geofeeds(url);
183b2f74 222
7808c8b7
MT
223 -- feeds
224 CREATE TABLE IF NOT EXISTS autnum_feeds(
225 number bigint NOT NULL,
226 source text NOT NULL,
227 name text,
228 country text,
229 is_anonymous_proxy boolean,
230 is_satellite_provider boolean,
231 is_anycast boolean,
232 is_drop boolean
233 );
234 CREATE UNIQUE INDEX IF NOT EXISTS autnum_feeds_unique
235 ON autnum_feeds(number, source);
236
c923c1cc
MT
237 CREATE TABLE IF NOT EXISTS network_feeds(
238 network inet NOT NULL,
239 source text NOT NULL,
240 country text,
241 is_anonymous_proxy boolean,
242 is_satellite_provider boolean,
243 is_anycast boolean,
244 is_drop boolean
245 );
246 CREATE UNIQUE INDEX IF NOT EXISTS network_feeds_unique
247 ON network_feeds(network, source);
248 CREATE INDEX IF NOT EXISTS network_feeds_search
249 ON network_feeds USING GIST(network inet_ops);
250
d7fc3057
MT
251 -- overrides
252 CREATE TABLE IF NOT EXISTS autnum_overrides(
253 number bigint NOT NULL,
254 name text,
bd1aa6a1 255 country text,
b8e25b71
MT
256 is_anonymous_proxy boolean,
257 is_satellite_provider boolean,
258 is_anycast boolean
d7fc3057
MT
259 );
260 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
261 ON autnum_overrides(number);
39ee3120 262 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
e17e804e 263 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
d7fc3057
MT
264
265 CREATE TABLE IF NOT EXISTS network_overrides(
266 network inet NOT NULL,
267 country text,
b8e25b71
MT
268 is_anonymous_proxy boolean,
269 is_satellite_provider boolean,
270 is_anycast boolean
d7fc3057
MT
271 );
272 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
273 ON network_overrides(network);
991baf53
MT
274 CREATE INDEX IF NOT EXISTS network_overrides_search
275 ON network_overrides USING GIST(network inet_ops);
39ee3120 276 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
e17e804e 277 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
29c6fa22
MT
278 """)
279
280 return db
281
0983f3dd
MT
282 def handle_write(self, ns):
283 """
284 Compiles a database in libloc format out of what is in the database
285 """
0983f3dd 286 # Allocate a writer
1164d876 287 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
0983f3dd
MT
288
289 # Set all metadata
290 if ns.vendor:
291 writer.vendor = ns.vendor
292
293 if ns.description:
294 writer.description = ns.description
295
296 if ns.license:
297 writer.license = ns.license
298
299 # Add all Autonomous Systems
300 log.info("Writing Autonomous Systems...")
301
302 # Select all ASes with a name
6e97c44b
MT
303 rows = self.db.query("""
304 SELECT
305 autnums.number AS number,
306 COALESCE(
ef1581fb 307 overrides.name,
6e97c44b
MT
308 autnums.name
309 ) AS name
ef1581fb
MT
310 FROM
311 autnums
312 LEFT JOIN
313 autnum_overrides overrides ON autnums.number = overrides.number
314 ORDER BY
315 autnums.number
316 """)
0983f3dd
MT
317
318 for row in rows:
008d5468
MT
319 # Skip AS without names
320 if not row.name:
321 continue
322
0983f3dd
MT
323 a = writer.add_as(row.number)
324 a.name = row.name
325
326 # Add all networks
327 log.info("Writing networks...")
328
329 # Select all known networks
330 rows = self.db.query("""
295a7774
MT
331 WITH known_networks AS (
332 SELECT network FROM announcements
333 UNION
334 SELECT network FROM networks
335 UNION
c923c1cc
MT
336 SEELCT network FROM network_feeds
337 UNION
295a7774 338 SELECT network FROM network_overrides
e87dbd44
MT
339 UNION
340 SELECT network FROM geofeed_networks
295a7774
MT
341 ),
342
343 ordered_networks AS (
344 SELECT
345 known_networks.network AS network,
346 announcements.autnum AS autnum,
347 networks.country AS country,
348
349 -- Must be part of returned values for ORDER BY clause
350 masklen(announcements.network) AS sort_a,
351 masklen(networks.network) AS sort_b
352 FROM
353 known_networks
354 LEFT JOIN
355 announcements ON known_networks.network <<= announcements.network
356 LEFT JOIN
357 networks ON known_networks.network <<= networks.network
358 ORDER BY
359 known_networks.network,
360 sort_a DESC,
361 sort_b DESC
362 )
363
5372d9c7
MT
364 -- Return a list of those networks enriched with all
365 -- other information that we store in the database
0983f3dd 366 SELECT
bbea93a7
MT
367 DISTINCT ON (network)
368 network,
369 autnum,
bd1aa6a1
MT
370
371 -- Country
372 COALESCE(
373 (
374 SELECT country FROM network_overrides overrides
bbea93a7 375 WHERE networks.network <<= overrides.network
bd1aa6a1
MT
376 ORDER BY masklen(overrides.network) DESC
377 LIMIT 1
378 ),
379 (
380 SELECT country FROM autnum_overrides overrides
bbea93a7 381 WHERE networks.autnum = overrides.number
bd1aa6a1 382 ),
c923c1cc
MT
383 (
384 SELECT country FROM network_feeds feeds
385 WHERE networks.network <<= feeds.network
386 ORDER BY masklen(feeds.network) DESC
387 LIMIT 1
388 ),
7808c8b7
MT
389 (
390 SELECT country FROM autnum_feeds feeds
391 WHERE networks.autnum = feeds.number
392 ORDER BY source LIMIT 1
393 ),
e87dbd44
MT
394 (
395 SELECT
396 geofeed_networks.country AS country
397 FROM
398 network_geofeeds
399
400 -- Join the data from the geofeeds
401 LEFT JOIN
402 geofeeds ON network_geofeeds.url = geofeeds.url
403 LEFT JOIN
404 geofeed_networks ON geofeeds.id = geofeed_networks.geofeed_id
405
406 -- Check whether we have a geofeed for this network
407 WHERE
408 networks.network <<= network_geofeeds.network
409 AND
410 networks.network <<= geofeed_networks.network
411
412 -- Filter for the best result
413 ORDER BY
414 masklen(geofeed_networks.network) DESC
415 LIMIT 1
416 ),
bd1aa6a1
MT
417 networks.country
418 ) AS country,
8e8555bb 419
0983f3dd 420 -- Flags
1422b5d4
MT
421 COALESCE(
422 (
423 SELECT is_anonymous_proxy FROM network_overrides overrides
bbea93a7 424 WHERE networks.network <<= overrides.network
1422b5d4
MT
425 ORDER BY masklen(overrides.network) DESC
426 LIMIT 1
427 ),
c923c1cc
MT
428 (
429 SELECT is_anonymous_proxy FROM network_feeds feeds
430 WHERE networks.network <<= feeds.network
431 ORDER BY masklen(feeds.network) DESC
432 LIMIT 1
433 ),
7808c8b7
MT
434 (
435 SELECT is_anonymous_proxy FROM autnum_feeds feeds
436 WHERE networks.autnum = feeds.number
437 ORDER BY source LIMIT 1
438 ),
1422b5d4
MT
439 (
440 SELECT is_anonymous_proxy FROM autnum_overrides overrides
bbea93a7 441 WHERE networks.autnum = overrides.number
b8e25b71
MT
442 ),
443 FALSE
1422b5d4
MT
444 ) AS is_anonymous_proxy,
445 COALESCE(
446 (
447 SELECT is_satellite_provider FROM network_overrides overrides
bbea93a7 448 WHERE networks.network <<= overrides.network
1422b5d4
MT
449 ORDER BY masklen(overrides.network) DESC
450 LIMIT 1
451 ),
c923c1cc
MT
452 (
453 SELECT is_satellite_provider FROM network_feeds feeds
454 WHERE networks.network <<= feeds.network
455 ORDER BY masklen(feeds.network) DESC
456 LIMIT 1
457 ),
7808c8b7
MT
458 (
459 SELECT is_satellite_provider FROM autnum_feeds feeds
460 WHERE networks.autnum = feeds.number
461 ORDER BY source LIMIT 1
462 ),
1422b5d4
MT
463 (
464 SELECT is_satellite_provider FROM autnum_overrides overrides
bbea93a7 465 WHERE networks.autnum = overrides.number
b8e25b71
MT
466 ),
467 FALSE
1422b5d4
MT
468 ) AS is_satellite_provider,
469 COALESCE(
470 (
471 SELECT is_anycast FROM network_overrides overrides
bbea93a7 472 WHERE networks.network <<= overrides.network
1422b5d4
MT
473 ORDER BY masklen(overrides.network) DESC
474 LIMIT 1
475 ),
c923c1cc
MT
476 (
477 SELECT is_anycast FROM network_feeds feeds
478 WHERE networks.network <<= feeds.network
479 ORDER BY masklen(feeds.network) DESC
480 LIMIT 1
481 ),
7808c8b7
MT
482 (
483 SELECT is_anycast FROM autnum_feeds feeds
484 WHERE networks.autnum = feeds.number
485 ORDER BY source LIMIT 1
486 ),
1422b5d4
MT
487 (
488 SELECT is_anycast FROM autnum_overrides overrides
bbea93a7 489 WHERE networks.autnum = overrides.number
b8e25b71
MT
490 ),
491 FALSE
e17e804e
PM
492 ) AS is_anycast,
493 COALESCE(
494 (
495 SELECT is_drop FROM network_overrides overrides
496 WHERE networks.network <<= overrides.network
497 ORDER BY masklen(overrides.network) DESC
498 LIMIT 1
499 ),
c923c1cc
MT
500 (
501 SELECT is_drop FROM network_feeds feeds
502 WHERE networks.network <<= feeds.network
503 ORDER BY masklen(feeds.network) DESC
504 LIMIT 1
505 ),
7808c8b7
MT
506 (
507 SELECT is_drop FROM autnum_feeds feeds
508 WHERE networks.autnum = feeds.number
509 ORDER BY source LIMIT 1
510 ),
e17e804e
PM
511 (
512 SELECT is_drop FROM autnum_overrides overrides
513 WHERE networks.autnum = overrides.number
514 ),
515 FALSE
516 ) AS is_drop
295a7774
MT
517 FROM
518 ordered_networks networks
0983f3dd
MT
519 """)
520
521 for row in rows:
522 network = writer.add_network(row.network)
523
5372d9c7
MT
524 # Save country
525 if row.country:
526 network.country_code = row.country
527
528 # Save ASN
529 if row.autnum:
530 network.asn = row.autnum
0983f3dd
MT
531
532 # Set flags
533 if row.is_anonymous_proxy:
534 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
535
536 if row.is_satellite_provider:
537 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
538
539 if row.is_anycast:
540 network.set_flag(location.NETWORK_FLAG_ANYCAST)
541
e17e804e
PM
542 if row.is_drop:
543 network.set_flag(location.NETWORK_FLAG_DROP)
544
8084b33a
MT
545 # Add all countries
546 log.info("Writing countries...")
547 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
548
549 for row in rows:
550 c = writer.add_country(row.country_code)
551 c.continent_code = row.continent_code
552 c.name = row.name
553
0983f3dd
MT
554 # Write everything to file
555 log.info("Writing database to file...")
556 for file in ns.file:
557 writer.write(file)
558
6ffd06b5
MT
559 def handle_update_whois(self, ns):
560 downloader = location.importer.Downloader()
561
3ce28dea
MT
562 # Did we run successfully?
563 error = False
84b175e2 564
fa9220b0 565 # Fetch all valid country codes to check parsed networks against
3ce28dea 566 validcountries = self.countries
6ffd06b5 567
3ce28dea 568 # Iterate over all potential sources
f3edaa9c 569 for source in sorted(location.importer.SOURCES):
3ce28dea
MT
570 with self.db.transaction():
571 # Create some temporary tables to store parsed data
572 self.db.execute("""
573 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
574 organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
575 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
576
577 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
578 name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
579 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
580
581 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL,
582 original_countries text[] NOT NULL, source text NOT NULL)
583 ON COMMIT DROP;
584 CREATE INDEX _rirdata_search ON _rirdata
585 USING BTREE(family(network), masklen(network));
586 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
587 """)
002deb6b 588
3ce28dea 589 # Remove all previously imported content
54b3f745 590 self.db.execute("DELETE FROM autnums WHERE source = %s", source)
3ce28dea 591 self.db.execute("DELETE FROM networks WHERE source = %s", source)
002deb6b 592
3ce28dea
MT
593 try:
594 # Fetch WHOIS sources
595 for url in location.importer.WHOIS_SOURCES.get(source, []):
596 for block in downloader.request_blocks(url):
597 self._parse_block(block, source, validcountries)
598
599 # Fetch extended sources
600 for url in location.importer.EXTENDED_SOURCES.get(source, []):
601 for line in downloader.request_lines(url):
602 self._parse_line(line, source, validcountries)
603 except urllib.error.URLError as e:
e76b8204 604 log.error("Could not retrieve data from %s: %s" % (source, e))
3ce28dea
MT
605 error = True
606
607 # Continue with the next source
608 continue
002deb6b 609
3ce28dea
MT
610 # Process all parsed networks from every RIR we happen to have access to,
611 # insert the largest network chunks into the networks table immediately...
fa9220b0
MT
612 families = self.db.query("""
613 SELECT DISTINCT
614 family(network) AS family
615 FROM
616 _rirdata
617 ORDER BY
618 family(network)
619 """,
620 )
3ce28dea
MT
621
622 for family in (row.family for row in families):
5bfa1bb4
MT
623 # Fetch the smallest mask length in our data set
624 smallest = self.db.get("""
625 SELECT
626 MIN(
627 masklen(network)
628 ) AS prefix
629 FROM
630 _rirdata
631 WHERE
fa9220b0
MT
632 family(network) = %s
633 """, family,
5bfa1bb4
MT
634 )
635
636 # Copy all networks
637 self.db.execute("""
638 INSERT INTO
639 networks
640 (
641 network,
642 country,
643 original_countries,
644 source
645 )
646 SELECT
647 network,
648 country,
649 original_countries,
650 source
651 FROM
652 _rirdata
653 WHERE
654 masklen(network) = %s
655 AND
656 family(network) = %s
657 ON CONFLICT DO
658 NOTHING""",
659 smallest.prefix,
660 family,
661 )
3ce28dea
MT
662
663 # ... determine any other prefixes for this network family, ...
5bfa1bb4
MT
664 prefixes = self.db.query("""
665 SELECT
666 DISTINCT masklen(network) AS prefix
667 FROM
668 _rirdata
669 WHERE
670 family(network) = %s
671 ORDER BY
672 masklen(network) ASC
fa9220b0
MT
673 OFFSET 1
674 """, family,
5bfa1bb4 675 )
3ce28dea
MT
676
677 # ... and insert networks with this prefix in case they provide additional
678 # information (i. e. subnet of a larger chunk with a different country)
679 for prefix in (row.prefix for row in prefixes):
680 self.db.execute("""
681 WITH candidates AS (
682 SELECT
683 _rirdata.network,
684 _rirdata.country,
685 _rirdata.original_countries,
686 _rirdata.source
687 FROM
688 _rirdata
689 WHERE
690 family(_rirdata.network) = %s
691 AND
692 masklen(_rirdata.network) = %s
693 ),
694 filtered AS (
695 SELECT
696 DISTINCT ON (c.network)
697 c.network,
698 c.country,
699 c.original_countries,
700 c.source,
701 masklen(networks.network),
702 networks.country AS parent_country
703 FROM
704 candidates c
705 LEFT JOIN
706 networks
707 ON
708 c.network << networks.network
709 ORDER BY
710 c.network,
711 masklen(networks.network) DESC NULLS LAST
712 )
713 INSERT INTO
714 networks(network, country, original_countries, source)
002deb6b 715 SELECT
3ce28dea
MT
716 network,
717 country,
718 original_countries,
719 source
002deb6b 720 FROM
3ce28dea 721 filtered
002deb6b 722 WHERE
3ce28dea
MT
723 parent_country IS NULL
724 OR
725 country <> parent_country
fa9220b0
MT
726 ON CONFLICT DO NOTHING
727 """, family, prefix,
002deb6b 728 )
0365119d 729
3ce28dea 730 self.db.execute("""
fa9220b0
MT
731 INSERT INTO
732 autnums
733 (
734 number,
735 name,
736 source
737 )
738 SELECT
739 _autnums.number,
740 _organizations.name,
741 _organizations.source
742 FROM
743 _autnums
744 JOIN
745 _organizations ON _autnums.organization = _organizations.handle
746 ON CONFLICT
747 (
748 number
749 )
750 DO UPDATE
751 SET name = excluded.name
752 """,
753 )
429a43d1 754
3ce28dea
MT
755 # Download and import (technical) AS names from ARIN
756 with self.db.transaction():
41f5a725 757 self._import_as_names_from_arin(downloader)
92403f39 758
3ce28dea
MT
759 # Return a non-zero exit code for errors
760 return 1 if error else 0
761
bd341642
PM
762 def _check_parsed_network(self, network):
763 """
764 Assistive function to detect and subsequently sort out parsed
765 networks from RIR data (both Whois and so-called "extended sources"),
766 which are or have...
767
768 (a) not globally routable (RFC 1918 space, et al.)
769 (b) covering a too large chunk of the IP address space (prefix length
770 is < 7 for IPv4 networks, and < 10 for IPv6)
771 (c) "0.0.0.0" or "::" as a network address
772 (d) are too small for being publicly announced (we have decided not to
773 process them at the moment, as they significantly enlarge our
774 database without providing very helpful additional information)
775
776 This unfortunately is necessary due to brain-dead clutter across
777 various RIR databases, causing mismatches and eventually disruptions.
778
779 We will return False in case a network is not suitable for adding
780 it to our database, and True otherwise.
781 """
782
783 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
784 return False
785
786 if not network.is_global:
2ba6ed07 787 log.debug("Skipping non-globally routable network: %s" % network)
bd341642
PM
788 return False
789
790 if network.version == 4:
791 if network.prefixlen < 7:
2ba6ed07 792 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
793 return False
794
795 if network.prefixlen > 24:
ebb087cf 796 log.debug("Skipping network too small to be publicly announced: %s" % network)
bd341642
PM
797 return False
798
799 if str(network.network_address) == "0.0.0.0":
2ba6ed07 800 log.debug("Skipping network based on 0.0.0.0: %s" % network)
bd341642
PM
801 return False
802
803 elif network.version == 6:
804 if network.prefixlen < 10:
2ba6ed07 805 log.debug("Skipping too big IP chunk: %s" % network)
bd341642
PM
806 return False
807
808 if network.prefixlen > 48:
ebb087cf 809 log.debug("Skipping network too small to be publicly announced: %s" % network)
bd341642
PM
810 return False
811
812 if str(network.network_address) == "::":
2ba6ed07 813 log.debug("Skipping network based on '::': %s" % network)
bd341642
PM
814 return False
815
816 else:
817 # This should not happen...
84187ab5 818 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
bd341642
PM
819 return False
820
821 # In case we have made it here, the network is considered to
822 # be suitable for libloc consumption...
823 return True
824
43fe570c
PM
825 def _check_parsed_asn(self, asn):
826 """
827 Assistive function to filter Autonomous System Numbers not being suitable
828 for adding to our database. Returns False in such cases, and True otherwise.
829 """
830
831 for start, end in VALID_ASN_RANGES:
832 if start <= asn and end >= asn:
833 return True
834
835 log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
836 return False
837
28b08385 838 def _parse_block(self, block, source_key, validcountries = None):
6ffd06b5
MT
839 # Get first line to find out what type of block this is
840 line = block[0]
841
6ffd06b5 842 # aut-num
429a43d1 843 if line.startswith("aut-num:"):
28b08385 844 return self._parse_autnum_block(block, source_key)
6ffd06b5 845
aadac4c5
PM
846 # inetnum
847 if line.startswith("inet6num:") or line.startswith("inetnum:"):
28b08385 848 return self._parse_inetnum_block(block, source_key, validcountries)
aadac4c5 849
6ffd06b5
MT
850 # organisation
851 elif line.startswith("organisation:"):
28b08385 852 return self._parse_org_block(block, source_key)
6ffd06b5 853
28b08385 854 def _parse_autnum_block(self, block, source_key):
6ffd06b5
MT
855 autnum = {}
856 for line in block:
857 # Split line
858 key, val = split_line(line)
859
860 if key == "aut-num":
861 m = re.match(r"^(AS|as)(\d+)", val)
862 if m:
863 autnum["asn"] = m.group(2)
864
0365119d 865 elif key == "org":
e7d612e5 866 autnum[key] = val.upper()
6ffd06b5 867
426e0bee
PM
868 elif key == "descr":
869 # Save the first description line as well...
870 if not key in autnum:
871 autnum[key] = val
872
6ffd06b5 873 # Skip empty objects
426e0bee 874 if not autnum or not "asn" in autnum:
6ffd06b5
MT
875 return
876
426e0bee
PM
877 # Insert a dummy organisation handle into our temporary organisations
878 # table in case the AS does not have an organisation handle set, but
879 # has a description (a quirk often observed in APNIC area), so we can
880 # later display at least some string for this AS.
881 if not "org" in autnum:
882 if "descr" in autnum:
883 autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn")
884
885 self.db.execute("INSERT INTO _organizations(handle, name, source) \
886 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
887 autnum.get("org"), autnum.get("descr"), source_key,
888 )
889 else:
890 log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
891 autnum.get("asn"))
892 return
893
6ffd06b5 894 # Insert into database
28b08385
PM
895 self.db.execute("INSERT INTO _autnums(number, organization, source) \
896 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
0365119d 897 organization = excluded.organization",
28b08385 898 autnum.get("asn"), autnum.get("org"), source_key,
6ffd06b5
MT
899 )
900
28b08385 901 def _parse_inetnum_block(self, block, source_key, validcountries = None):
84187ab5 902 log.debug("Parsing inetnum block:")
aadac4c5
PM
903
904 inetnum = {}
905 for line in block:
84187ab5 906 log.debug(line)
aadac4c5
PM
907
908 # Split line
909 key, val = split_line(line)
910
84187ab5
PM
911 # Filter any inetnum records which are only referring to IP space
912 # not managed by that specific RIR...
913 if key == "netname":
5254e5fc 914 if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()):
2ba6ed07 915 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
84187ab5
PM
916 return
917
aadac4c5
PM
918 if key == "inetnum":
919 start_address, delim, end_address = val.partition("-")
920
921 # Strip any excess space
922 start_address, end_address = start_address.rstrip(), end_address.strip()
923
845da577
PM
924 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
925 if start_address and not (delim or end_address):
926 try:
927 start_address = ipaddress.ip_network(start_address, strict=False)
928 except ValueError:
929 start_address = start_address.split("/")
38d0faea 930 ldigits = start_address[0].count(".")
845da577
PM
931
932 # How many octets do we need to add?
933 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
2ad36187 934 if ldigits == 1:
845da577 935 start_address = start_address[0] + ".0.0/" + start_address[1]
2ad36187 936 elif ldigits == 2:
845da577
PM
937 start_address = start_address[0] + ".0/" + start_address[1]
938 else:
939 log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line)
940 return
941
942 try:
943 start_address = ipaddress.ip_network(start_address, strict=False)
944 except ValueError:
945 log.warning("Could not parse line in LACNIC DB format: %s" % line)
946 return
947
948 # Enumerate first and last IP address of this network
949 end_address = start_address[-1]
950 start_address = start_address[0]
951
952 else:
953 # Convert to IP address
954 try:
955 start_address = ipaddress.ip_address(start_address)
956 end_address = ipaddress.ip_address(end_address)
957 except ValueError:
958 log.warning("Could not parse line: %s" % line)
959 return
aadac4c5 960
1814283b 961 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
aadac4c5
PM
962
963 elif key == "inet6num":
1814283b 964 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
aadac4c5
PM
965
966 elif key == "country":
7434e5e0
PM
967 val = val.upper()
968
b6b2b331 969 # Catch RIR data objects with more than one country code...
7434e5e0 970 if not key in inetnum:
b6b2b331
PM
971 inetnum[key] = []
972 else:
7434e5e0 973 if val in inetnum.get("country"):
b6b2b331
PM
974 # ... but keep this list distinct...
975 continue
976
01e5f0ff
PM
977 # When people set country codes to "UK", they actually mean "GB"
978 if val == "UK":
979 val = "GB"
980
7434e5e0 981 inetnum[key].append(val)
aadac4c5 982
183b2f74
MT
983 # Parse the geofeed attribute
984 elif key == "geofeed":
985 inetnum["geofeed"] = val
986
987 # Parse geofeed when used as a remark
45c0536c
MT
988 elif key == "remarks":
989 m = re.match(r"^(?:Geofeed)\s+(https://.*)", val)
183b2f74
MT
990 if m:
991 inetnum["geofeed"] = m.group(1)
992
aadac4c5 993 # Skip empty objects
002deb6b 994 if not inetnum or not "country" in inetnum:
aadac4c5
PM
995 return
996
b6b2b331
PM
997 # Prepare skipping objects with unknown country codes...
998 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
999
1814283b
PM
1000 # Iterate through all networks enumerated from above, check them for plausibility and insert
1001 # them into the database, if _check_parsed_network() succeeded
1002 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
1003 if self._check_parsed_network(single_network):
b6b2b331
PM
1004 # Skip objects with unknown country codes if they are valid to avoid log spam...
1005 if validcountries and invalidcountries:
1006 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
1007 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
48770ca8 1008 break
7138b4ac
PM
1009
1010 # Everything is fine here, run INSERT statement...
b6b2b331
PM
1011 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
1012 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
1013 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
1814283b 1014 )
aadac4c5 1015
183b2f74
MT
1016 # Update any geofeed information
1017 geofeed = inetnum.get("geofeed", None)
183b2f74 1018 if geofeed:
da832d44 1019 self._parse_geofeed(geofeed, single_network)
183b2f74
MT
1020
1021 # Delete any previous geofeeds
1022 else:
1023 self.db.execute("DELETE FROM network_geofeeds WHERE network = %s",
1024 "%s" % single_network)
1025
da832d44
MT
1026 def _parse_geofeed(self, url, single_network):
1027 # Parse the URL
1028 url = urllib.parse.urlparse(url)
1029
1030 # Make sure that this is a HTTPS URL
1031 if not url.scheme == "https":
1032 log.debug("Geofeed URL is not using HTTPS: %s" % geofeed)
1033 return
1034
1035 # Put the URL back together normalized
1036 url = url.geturl()
1037
1038 # Store/update any geofeeds
1039 self.db.execute("""
1040 INSERT INTO
1041 network_geofeeds(
1042 network,
1043 url
1044 )
1045 VALUES(
1046 %s, %s
1047 )
1048 ON CONFLICT (network) DO
1049 UPDATE SET url = excluded.url""",
1050 "%s" % single_network, url,
1051 )
1052
28b08385 1053 def _parse_org_block(self, block, source_key):
6ffd06b5
MT
1054 org = {}
1055 for line in block:
1056 # Split line
1057 key, val = split_line(line)
1058
e7d612e5
PM
1059 if key == "organisation":
1060 org[key] = val.upper()
1061 elif key == "org-name":
6ffd06b5
MT
1062 org[key] = val
1063
1064 # Skip empty objects
1065 if not org:
1066 return
1067
28b08385
PM
1068 self.db.execute("INSERT INTO _organizations(handle, name, source) \
1069 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
0365119d 1070 UPDATE SET name = excluded.name",
28b08385 1071 org.get("organisation"), org.get("org-name"), source_key,
6ffd06b5
MT
1072 )
1073
28b08385 1074 def _parse_line(self, line, source_key, validcountries = None):
429a43d1
MT
1075 # Skip version line
1076 if line.startswith("2"):
1077 return
6ffd06b5 1078
429a43d1
MT
1079 # Skip comments
1080 if line.startswith("#"):
1081 return
6ffd06b5 1082
429a43d1
MT
1083 try:
1084 registry, country_code, type, line = line.split("|", 3)
1085 except:
1086 log.warning("Could not parse line: %s" % line)
1087 return
6ffd06b5 1088
84b175e2
PM
1089 # Skip any lines that are for stats only or do not have a country
1090 # code at all (avoids log spam below)
1091 if not country_code or country_code == '*':
1092 return
1093
1094 # Skip objects with unknown country codes
1095 if validcountries and country_code not in validcountries:
1096 log.warning("Skipping line with bogus country '%s': %s" % \
1097 (country_code, line))
6ffd06b5
MT
1098 return
1099
429a43d1 1100 if type in ("ipv6", "ipv4"):
28b08385 1101 return self._parse_ip_line(country_code, type, line, source_key)
429a43d1 1102
28b08385 1103 def _parse_ip_line(self, country, type, line, source_key):
429a43d1
MT
1104 try:
1105 address, prefix, date, status, organization = line.split("|")
1106 except ValueError:
1107 organization = None
1108
1109 # Try parsing the line without organization
1110 try:
1111 address, prefix, date, status = line.split("|")
1112 except ValueError:
1113 log.warning("Unhandled line format: %s" % line)
1114 return
1115
1116 # Skip anything that isn't properly assigned
1117 if not status in ("assigned", "allocated"):
1118 return
1119
1120 # Cast prefix into an integer
1121 try:
1122 prefix = int(prefix)
1123 except:
1124 log.warning("Invalid prefix: %s" % prefix)
7177031f 1125 return
429a43d1
MT
1126
1127 # Fix prefix length for IPv4
1128 if type == "ipv4":
1129 prefix = 32 - int(math.log(prefix, 2))
1130
1131 # Try to parse the address
1132 try:
1133 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
1134 except ValueError:
1135 log.warning("Invalid IP address: %s" % address)
1136 return
1137
bd341642
PM
1138 if not self._check_parsed_network(network):
1139 return
1140
b6b2b331
PM
1141 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
1142 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
87b3e102 1143 UPDATE SET country = excluded.country",
b6b2b331 1144 "%s" % network, country, [country], source_key,
6ffd06b5
MT
1145 )
1146
18a72eac
MT
1147 def _import_as_names_from_arin(self, downloader):
1148 # Delete all previously imported content
1149 self.db.execute("DELETE FROM autnums WHERE source = %s", "ARIN")
92403f39 1150
18a72eac
MT
1151 # Try to retrieve the feed from ftp.arin.net
1152 feed = downloader.request_lines("https://ftp.arin.net/pub/resource_registry_service/asns.csv")
f4f3d8ad 1153
18a72eac
MT
1154 # Walk through the file
1155 for line in csv.DictReader(feed, dialect="arin"):
1156 log.debug("Processing object: %s" % line)
92403f39 1157
18a72eac
MT
1158 # Fetch status
1159 status = line.get("Status")
92403f39 1160
18a72eac
MT
1161 # We are only interested in anything managed by ARIN
1162 if not status == "Full Registry Services":
85e44d9a 1163 continue
92403f39 1164
18a72eac
MT
1165 # Fetch organization name
1166 name = line.get("Org Name")
92403f39 1167
18a72eac
MT
1168 # Extract ASNs
1169 first_asn = line.get("Start AS Number")
1170 last_asn = line.get("End AS Number")
f4f3d8ad 1171
18a72eac
MT
1172 # Cast to a number
1173 try:
1174 first_asn = int(first_asn)
1175 except TypeError as e:
1176 log.warning("Could not parse ASN '%s'" % first_asn)
85e44d9a 1177 continue
92403f39 1178
18a72eac
MT
1179 try:
1180 last_asn = int(last_asn)
1181 except TypeError as e:
1182 log.warning("Could not parse ASN '%s'" % last_asn)
f4f3d8ad 1183 continue
92403f39 1184
18a72eac
MT
1185 # Check if the range is valid
1186 if last_asn < first_asn:
1187 log.warning("Invalid ASN range %s-%s" % (first_asn, last_asn))
1188
1189 # Insert everything into the database
1190 for asn in range(first_asn, last_asn + 1):
1191 if not self._check_parsed_asn(asn):
1192 log.warning("Skipping invalid ASN %s" % asn)
1193 continue
1194
1195 self.db.execute("""
1196 INSERT INTO
1197 autnums
1198 (
1199 number,
1200 name,
1201 source
1202 )
1203 VALUES
1204 (
1205 %s, %s, %s
1206 )
1207 ON CONFLICT
1208 (
1209 number
1210 )
1211 DO NOTHING
1212 """, asn, name, "ARIN",
1213 )
92403f39 1214
83d61c46
MT
1215 def handle_update_announcements(self, ns):
1216 server = ns.server[0]
1217
22d8d199
MT
1218 with self.db.transaction():
1219 if server.startswith("/"):
1220 self._handle_update_announcements_from_bird(server)
22d8d199
MT
1221
1222 # Purge anything we never want here
1223 self.db.execute("""
1224 -- Delete default routes
1225 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
1226
1227 -- Delete anything that is not global unicast address space
1228 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
1229
1230 -- DELETE "current network" address space
1231 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
1232
1233 -- DELETE local loopback address space
1234 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
1235
1236 -- DELETE RFC 1918 address space
1237 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
1238 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
1239 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
1240
1241 -- DELETE test, benchmark and documentation address space
1242 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
1243 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
1244 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
1245 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
1246 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
1247
1248 -- DELETE CGNAT address space (RFC 6598)
1249 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
1250
1251 -- DELETE link local address space
1252 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
1253
b4d5b2a6 1254 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
22d8d199 1255 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
b4d5b2a6 1256 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
22d8d199
MT
1257
1258 -- DELETE multicast and reserved address space
1259 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
1260 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
1261
1262 -- Delete networks that are too small to be in the global routing table
1263 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
1264 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
1265
1266 -- Delete any non-public or reserved ASNs
1267 DELETE FROM announcements WHERE NOT (
1268 (autnum >= 1 AND autnum <= 23455)
1269 OR
1270 (autnum >= 23457 AND autnum <= 64495)
1271 OR
1272 (autnum >= 131072 AND autnum <= 4199999999)
1273 );
1274
1275 -- Delete everything that we have not seen for 14 days
1276 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
1277 """)
1278
1279 def _handle_update_announcements_from_bird(self, server):
1280 # Pre-compile the regular expression for faster searching
35e5f041 1281 route = re.compile(b"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
22d8d199
MT
1282
1283 log.info("Requesting routing table from Bird (%s)" % server)
1284
35e5f041
MT
1285 aggregated_networks = []
1286
22d8d199
MT
1287 # Send command to list all routes
1288 for line in self._bird_cmd(server, "show route"):
1289 m = route.match(line)
1290 if not m:
b767db8d
MT
1291 # Skip empty lines
1292 if not line:
1293 pass
1294
45fa97cd 1295 # Ignore any header lines with the name of the routing table
b767db8d
MT
1296 elif line.startswith(b"Table"):
1297 pass
45fa97cd
MT
1298
1299 # Log anything else
b767db8d
MT
1300 else:
1301 log.debug("Could not parse line: %s" % line.decode())
1302
22d8d199
MT
1303 continue
1304
1305 # Fetch the extracted network and ASN
1306 network, autnum = m.groups()
1307
35e5f041
MT
1308 # Decode into strings
1309 if network:
1310 network = network.decode()
1311 if autnum:
1312 autnum = autnum.decode()
1313
1314 # Collect all aggregated networks
1315 if not autnum:
1316 log.debug("%s is an aggregated network" % network)
1317 aggregated_networks.append(network)
1318 continue
1319
22d8d199
MT
1320 # Insert it into the database
1321 self.db.execute("INSERT INTO announcements(network, autnum) \
1322 VALUES(%s, %s) ON CONFLICT (network) DO \
1323 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
35e5f041 1324 network, autnum,
22d8d199
MT
1325 )
1326
35e5f041
MT
1327 # Process any aggregated networks
1328 for network in aggregated_networks:
1329 log.debug("Processing aggregated network %s" % network)
1330
1331 # Run "show route all" for each network
1332 for line in self._bird_cmd(server, "show route %s all" % network):
1333 # Try finding the path
1334 m = re.match(b"\s+BGP\.as_path:.* (\d+) {\d+}$", line)
1335 if m:
1336 # Select the last AS number in the path
1337 autnum = m.group(1).decode()
1338
1339 # Insert it into the database
1340 self.db.execute("INSERT INTO announcements(network, autnum) \
1341 VALUES(%s, %s) ON CONFLICT (network) DO \
1342 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1343 network, autnum,
1344 )
1345
1346 # We don't need to process any more
1347 break
1348
22d8d199
MT
1349 def _bird_cmd(self, socket_path, command):
1350 # Connect to the socket
1351 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1352 s.connect(socket_path)
cedee656 1353
22d8d199
MT
1354 # Allocate some buffer
1355 buffer = b""
83d61c46 1356
bbcefb38
MT
1357 log.debug("Sending Bird command: %s" % command)
1358
22d8d199
MT
1359 # Send the command
1360 s.send(b"%s\n" % command.encode())
209c04b6 1361
22d8d199
MT
1362 while True:
1363 # Fill up the buffer
1364 buffer += s.recv(4096)
209c04b6 1365
22d8d199
MT
1366 while True:
1367 # Search for the next newline
1368 pos = buffer.find(b"\n")
209c04b6 1369
22d8d199
MT
1370 # If we cannot find one, we go back and read more data
1371 if pos <= 0:
1372 break
209c04b6 1373
22d8d199
MT
1374 # Cut after the newline character
1375 pos += 1
b89cee80 1376
22d8d199
MT
1377 # Split the line we want and keep the rest in buffer
1378 line, buffer = buffer[:pos], buffer[pos:]
83d61c46 1379
bbcefb38
MT
1380 # Try parsing any status lines
1381 if len(line) > 4 and line[:4].isdigit() and line[4] in (32, 45):
1382 code, delim, line = int(line[:4]), line[4], line[5:]
1383
1384 log.debug("Received response code %s from bird" % code)
1385
1386 # End of output
1387 if code == 0:
1388 return
1389
1390 # Ignore hello line
1391 elif code == 1:
1392 continue
83d61c46 1393
22d8d199
MT
1394 # Otherwise return the line
1395 yield line
83d61c46 1396
0fcdd689 1397 def handle_update_geofeeds(self, ns):
ef8d1f67
MT
1398 # Sync geofeeds
1399 with self.db.transaction():
1400 # Delete all geofeeds which are no longer linked
1401 self.db.execute("""
1402 DELETE FROM
1403 geofeeds
1404 WHERE
1405 NOT EXISTS (
1406 SELECT
1407 1
1408 FROM
1409 network_geofeeds
1410 WHERE
1411 geofeeds.url = network_geofeeds.url
1412 )""",
1413 )
1414
1415 # Copy all geofeeds
1416 self.db.execute("""
1417 INSERT INTO
1418 geofeeds(
1419 url
1420 )
1421 SELECT
1422 url
1423 FROM
1424 network_geofeeds
1425 ON CONFLICT (url)
1426 DO NOTHING
1427 """,
1428 )
1429
0fcdd689
MT
1430 # Fetch all Geofeeds that require an update
1431 geofeeds = self.db.query("""
1432 SELECT
1433 id,
1434 url
1435 FROM
1436 geofeeds
1437 WHERE
1438 updated_at IS NULL
1439 OR
1440 updated_at <= CURRENT_TIMESTAMP - INTERVAL '1 week'
1441 ORDER BY
1442 id
1443 """)
1444
1445 with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
1446 results = executor.map(self._fetch_geofeed, geofeeds)
1447
fa33b321 1448 # Fetch all results to raise any exceptions
0fcdd689 1449 for result in results:
fa33b321 1450 pass
0fcdd689 1451
a4e50c7a
MT
1452 # Delete data from any feeds that did not update in the last two weeks
1453 with self.db.transaction():
1454 self.db.execute("""
1455 DELETE FROM
1456 geofeed_networks
1457 WHERE
1458 geofeed_networks.geofeed_id IN (
1459 SELECT
1460 geofeeds.id
1461 FROM
1462 geofeeds
1463 WHERE
1464 updated_at IS NULL
1465 OR
1466 updated_at <= CURRENT_TIMESTAMP - INTERVAL '2 weeks'
1467 )
1468 """)
1469
0fcdd689
MT
1470 def _fetch_geofeed(self, geofeed):
1471 log.debug("Fetching Geofeed %s" % geofeed.url)
1472
1473 with self.db.transaction():
1474 # Open the URL
1475 try:
1476 req = urllib.request.Request(geofeed.url, headers={
1477 "User-Agent" : "location/%s" % location.__version__,
1478
1479 # We expect some plain text file in CSV format
1480 "Accept" : "text/csv, text/plain",
1481 })
1482
1483 # XXX set proxy
1484
1485 # Send the request
1486 with urllib.request.urlopen(req, timeout=10) as f:
1487 # Remove any previous data
1488 self.db.execute("DELETE FROM geofeed_networks \
1489 WHERE geofeed_id = %s", geofeed.id)
1490
5e632485
MT
1491 lineno = 0
1492
0fcdd689
MT
1493 # Read the output line by line
1494 for line in f:
5e632485
MT
1495 lineno += 1
1496
56f147ce
MT
1497 try:
1498 line = line.decode()
1499
1500 # Ignore any lines we cannot decode
1501 except UnicodeDecodeError:
1502 log.debug("Could not decode line %s in %s" \
1503 % (lineno, geofeed.url))
1504 continue
0fcdd689
MT
1505
1506 # Strip any newline
1507 line = line.rstrip()
1508
1509 # Skip empty lines
1510 if not line:
1511 continue
1512
1513 # Try to parse the line
1514 try:
1515 fields = line.split(",", 5)
1516 except ValueError:
1517 log.debug("Could not parse line: %s" % line)
1518 continue
1519
1520 # Check if we have enough fields
1521 if len(fields) < 4:
1522 log.debug("Not enough fields in line: %s" % line)
1523 continue
1524
1525 # Fetch all fields
1526 network, country, region, city, = fields[:4]
1527
1528 # Try to parse the network
1529 try:
1530 network = ipaddress.ip_network(network, strict=False)
1531 except ValueError:
1532 log.debug("Could not parse network: %s" % network)
1533 continue
1534
72908243 1535 # Strip any excess whitespace from country codes
4ed16a14
MT
1536 country = country.strip()
1537
1538 # Make the country code uppercase
1539 country = country.upper()
72908243
MT
1540
1541 # Check the country code
5e632485
MT
1542 if not country:
1543 log.debug("Empty country code in Geofeed %s line %s" \
1544 % (geofeed.url, lineno))
1545 continue
1546
1547 elif not location.country_code_is_valid(country):
8228a917
MT
1548 log.debug("Invalid country code in Geofeed %s:%s: %s" \
1549 % (geofeed.url, lineno, country))
72908243 1550 continue
0fcdd689
MT
1551
1552 # Write this into the database
1553 self.db.execute("""
1554 INSERT INTO
1555 geofeed_networks (
1556 geofeed_id,
1557 network,
1558 country,
1559 region,
1560 city
1561 )
1562 VALUES (%s, %s, %s, %s, %s)""",
1563 geofeed.id,
1564 "%s" % network,
1565 country,
1566 region,
1567 city,
1568 )
1569
1570 # Catch any HTTP errors
1571 except urllib.request.HTTPError as e:
ce308393
MT
1572 self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
1573 WHERE id = %s", e.code, "%s" % e, geofeed.id)
0fcdd689 1574
bf07adcc
MT
1575 # Remove any previous data when the feed has been deleted
1576 if e.code == 404:
1577 self.db.execute("DELETE FROM geofeed_networks \
1578 WHERE geofeed_id = %s", geofeed.id)
1579
ce308393 1580 # Catch any other errors and connection timeouts
a6fedd9f 1581 except (http.client.InvalidURL, urllib.request.URLError, TimeoutError) as e:
ce308393 1582 log.debug("Could not fetch URL %s: %s" % (geofeed.url, e))
0fcdd689 1583
ce308393
MT
1584 self.db.execute("UPDATE geofeeds SET status = %s, error = %s \
1585 WHERE id = %s", 599, "%s" % e, geofeed.id)
15437681 1586
0fcdd689
MT
1587 # Mark the geofeed as updated
1588 else:
1589 self.db.execute("""
1590 UPDATE
1591 geofeeds
1592 SET
1593 updated_at = CURRENT_TIMESTAMP,
ce308393
MT
1594 status = NULL,
1595 error = NULL
0fcdd689
MT
1596 WHERE
1597 id = %s""",
1598 geofeed.id,
1599 )
1600
d7fc3057
MT
1601 def handle_update_overrides(self, ns):
1602 with self.db.transaction():
68fdeb3b
PM
1603 # Only drop manually created overrides, as we can be reasonably sure to have them,
1604 # and preserve the rest. If appropriate, it is deleted by correspondent functions.
d7fc3057 1605 self.db.execute("""
68fdeb3b
PM
1606 DELETE FROM autnum_overrides WHERE source = 'manual';
1607 DELETE FROM network_overrides WHERE source = 'manual';
d7fc3057
MT
1608 """)
1609
dcef2ba4
PM
1610 # Update overrides for various cloud providers big enough to publish their own IP
1611 # network allocation lists in a machine-readable format...
1612 self._update_overrides_for_aws()
1613
69b3d894 1614 # Update overrides for Spamhaus DROP feeds...
99d15292 1615 self._update_feed_for_spamhaus_drop()
69b3d894 1616
d7fc3057
MT
1617 for file in ns.files:
1618 log.info("Reading %s..." % file)
1619
1620 with open(file, "rb") as f:
1621 for type, block in location.importer.read_blocks(f):
1622 if type == "net":
1623 network = block.get("net")
1624 # Try to parse and normalise the network
1625 try:
1626 network = ipaddress.ip_network(network, strict=False)
1627 except ValueError as e:
1628 log.warning("Invalid IP network: %s: %s" % (network, e))
1629 continue
1630
94dfab8c
MT
1631 # Prevent that we overwrite all networks
1632 if network.prefixlen == 0:
1633 log.warning("Skipping %s: You cannot overwrite default" % network)
1634 continue
1635
d7fc3057
MT
1636 self.db.execute("""
1637 INSERT INTO network_overrides(
1638 network,
1639 country,
39ee3120 1640 source,
d7fc3057
MT
1641 is_anonymous_proxy,
1642 is_satellite_provider,
e17e804e
PM
1643 is_anycast,
1644 is_drop
39ee3120 1645 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
d7fc3057
MT
1646 ON CONFLICT (network) DO NOTHING""",
1647 "%s" % network,
1648 block.get("country"),
39ee3120 1649 "manual",
28d29b7c
MT
1650 self._parse_bool(block, "is-anonymous-proxy"),
1651 self._parse_bool(block, "is-satellite-provider"),
1652 self._parse_bool(block, "is-anycast"),
e17e804e 1653 self._parse_bool(block, "drop"),
d7fc3057
MT
1654 )
1655
f476cdfd
MT
1656 elif type == "aut-num":
1657 autnum = block.get("aut-num")
d7fc3057
MT
1658
1659 # Check if AS number begins with "AS"
1660 if not autnum.startswith("AS"):
1661 log.warning("Invalid AS number: %s" % autnum)
1662 continue
1663
1664 # Strip "AS"
1665 autnum = autnum[2:]
1666
1667 self.db.execute("""
1668 INSERT INTO autnum_overrides(
1669 number,
1670 name,
bd1aa6a1 1671 country,
39ee3120 1672 source,
d7fc3057
MT
1673 is_anonymous_proxy,
1674 is_satellite_provider,
e17e804e
PM
1675 is_anycast,
1676 is_drop
39ee3120 1677 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
d7fc3057 1678 ON CONFLICT DO NOTHING""",
bd1aa6a1
MT
1679 autnum,
1680 block.get("name"),
1681 block.get("country"),
39ee3120 1682 "manual",
28d29b7c
MT
1683 self._parse_bool(block, "is-anonymous-proxy"),
1684 self._parse_bool(block, "is-satellite-provider"),
1685 self._parse_bool(block, "is-anycast"),
e17e804e 1686 self._parse_bool(block, "drop"),
d7fc3057
MT
1687 )
1688
1689 else:
03d24a9b 1690 log.warning("Unsupported type: %s" % type)
d7fc3057 1691
dcef2ba4
PM
1692 def _update_overrides_for_aws(self):
1693 # Download Amazon AWS IP allocation file to create overrides...
1694 downloader = location.importer.Downloader()
1695
1696 try:
85e44d9a
MT
1697 # Fetch IP ranges
1698 f = downloader.retrieve("https://ip-ranges.amazonaws.com/ip-ranges.json")
1699
1700 # Parse downloaded file
1701 aws_ip_dump = json.load(f)
dcef2ba4
PM
1702 except Exception as e:
1703 log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
1704 return
1705
68fdeb3b
PM
1706 # At this point, we can assume the downloaded file to be valid
1707 self.db.execute("""
1708 DELETE FROM network_overrides WHERE source = 'Amazon AWS IP feed';
1709 """)
1710
dcef2ba4
PM
1711 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1712 # there seems to be no machine-readable version available of this other than
1713 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1714 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1715 # was helpful here as well.
1716 aws_region_country_map = {
1717 "af-south-1": "ZA",
1718 "ap-east-1": "HK",
1719 "ap-south-1": "IN",
1720 "ap-south-2": "IN",
1721 "ap-northeast-3": "JP",
1722 "ap-northeast-2": "KR",
1723 "ap-southeast-1": "SG",
1724 "ap-southeast-2": "AU",
1725 "ap-southeast-3": "MY",
1726 "ap-southeast-4": "AU",
f0543670 1727 "ap-southeast-5": "NZ", # Auckland, NZ
c9486b13 1728 "ap-southeast-6": "AP", # XXX: Precise location not documented anywhere
dcef2ba4
PM
1729 "ap-northeast-1": "JP",
1730 "ca-central-1": "CA",
c9486b13 1731 "ca-west-1": "CA",
dcef2ba4
PM
1732 "eu-central-1": "DE",
1733 "eu-central-2": "CH",
1734 "eu-west-1": "IE",
1735 "eu-west-2": "GB",
1736 "eu-south-1": "IT",
1737 "eu-south-2": "ES",
1738 "eu-west-3": "FR",
1739 "eu-north-1": "SE",
a0543680 1740 "eusc-de-east-1" : "DE", # XXX: Undocumented, likely located in Berlin rather than Frankfurt
ad017947 1741 "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
dcef2ba4
PM
1742 "me-central-1": "AE",
1743 "me-south-1": "BH",
1744 "sa-east-1": "BR"
1745 }
1746
1747 # Fetch all valid country codes to check parsed networks aganist...
1748 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
1749 validcountries = []
1750
1751 for row in rows:
1752 validcountries.append(row.country_code)
1753
1754 with self.db.transaction():
1755 for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
1756 try:
1757 network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
1758 except ValueError:
1759 log.warning("Unable to parse line: %s" % snetwork)
1760 continue
1761
1762 # Sanitize parsed networks...
1763 if not self._check_parsed_network(network):
1764 continue
1765
1766 # Determine region of this network...
1767 region = snetwork["region"]
1768 cc = None
1769 is_anycast = False
1770
1771 # Any region name starting with "us-" will get "US" country code assigned straight away...
1772 if region.startswith("us-"):
1773 cc = "US"
1774 elif region.startswith("cn-"):
1775 # ... same goes for China ...
1776 cc = "CN"
1777 elif region == "GLOBAL":
1778 # ... funny region name for anycast-like networks ...
1779 is_anycast = True
1780 elif region in aws_region_country_map:
1781 # ... assign looked up country code otherwise ...
1782 cc = aws_region_country_map[region]
1783 else:
1784 # ... and bail out if we are missing something here
1785 log.warning("Unable to determine country code for line: %s" % snetwork)
1786 continue
1787
1788 # Skip networks with unknown country codes
1789 if not is_anycast and validcountries and cc not in validcountries:
1790 log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1791 (cc, network))
1792 return
1793
1794 # Conduct SQL statement...
1795 self.db.execute("""
1796 INSERT INTO network_overrides(
1797 network,
1798 country,
1799 source,
1800 is_anonymous_proxy,
1801 is_satellite_provider,
1802 is_anycast
1803 ) VALUES (%s, %s, %s, %s, %s, %s)
1804 ON CONFLICT (network) DO NOTHING""",
1805 "%s" % network,
1806 cc,
1807 "Amazon AWS IP feed",
1808 None,
1809 None,
1810 is_anycast,
1811 )
1812
1813
99d15292 1814 def _update_feed_for_spamhaus_drop(self):
69b3d894
PM
1815 downloader = location.importer.Downloader()
1816
5acac2a4
PM
1817 ip_lists = [
1818 ("SPAMHAUS-DROP", "https://www.spamhaus.org/drop/drop.txt"),
1819 ("SPAMHAUS-EDROP", "https://www.spamhaus.org/drop/edrop.txt"),
1820 ("SPAMHAUS-DROPV6", "https://www.spamhaus.org/drop/dropv6.txt")
69b3d894
PM
1821 ]
1822
5acac2a4 1823 asn_lists = [
5c7cfeb2 1824 ("SPAMHAUS-ASNDROP", "https://www.spamhaus.org/drop/asndrop.json")
69b3d894
PM
1825 ]
1826
5acac2a4
PM
1827 for name, url in ip_lists:
1828 # Fetch IP list from given URL
85e44d9a
MT
1829 f = downloader.retrieve(url)
1830
1831 # Split into lines
1832 fcontent = f.readlines()
69b3d894 1833
69b3d894 1834 with self.db.transaction():
5acac2a4
PM
1835 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
1836 # downloads.
1837 if len(fcontent) > 10:
6d43a05e 1838 self.db.execute("DELETE FROM network_feeds WHERE source = %s", name)
5acac2a4 1839 else:
4efa6d84 1840 log.warning("%s (%s) returned likely bogus file, ignored" % (name, url))
5acac2a4
PM
1841 continue
1842
1843 # Iterate through every line, filter comments and add remaining networks to
1844 # the override table in case they are valid...
69b3d894 1845 for sline in fcontent:
69b3d894
PM
1846 # The response is assumed to be encoded in UTF-8...
1847 sline = sline.decode("utf-8")
1848
1849 # Comments start with a semicolon...
1850 if sline.startswith(";"):
1851 continue
1852
1853 # Extract network and ignore anything afterwards...
1854 try:
1855 network = ipaddress.ip_network(sline.split()[0], strict=False)
1856 except ValueError:
1857 log.error("Unable to parse line: %s" % sline)
1858 continue
1859
1860 # Sanitize parsed networks...
1861 if not self._check_parsed_network(network):
5acac2a4
PM
1862 log.warning("Skipping bogus network found in %s (%s): %s" % \
1863 (name, url, network))
69b3d894
PM
1864 continue
1865
1866 # Conduct SQL statement...
1867 self.db.execute("""
6d43a05e
MT
1868 INSERT INTO
1869 network_feeds
1870 (
69b3d894
PM
1871 network,
1872 source,
1873 is_drop
6d43a05e
MT
1874 )
1875 VALUES
1876 (
1877 %s, %s, %s
1878 )""", "%s" % network, name, True,
69b3d894
PM
1879 )
1880
5acac2a4 1881 for name, url in asn_lists:
85e44d9a
MT
1882 # Fetch URL
1883 f = downloader.retrieve(url)
69b3d894 1884
5acac2a4
PM
1885 # Split into lines
1886 fcontent = f.readlines()
1887
69b3d894 1888 with self.db.transaction():
5acac2a4
PM
1889 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
1890 # downloads.
1891 if len(fcontent) > 10:
99d15292 1892 self.db.execute("DELETE FROM autnum_feeds WHERE source = %s", name)
5acac2a4 1893 else:
4efa6d84 1894 log.warning("%s (%s) returned likely bogus file, ignored" % (name, url))
5acac2a4
PM
1895 continue
1896
1897 # Iterate through every line, filter comments and add remaining ASNs to
1898 # the override table in case they are valid...
5c7cfeb2 1899 for sline in fcontent:
69b3d894
PM
1900 # The response is assumed to be encoded in UTF-8...
1901 sline = sline.decode("utf-8")
1902
5c7cfeb2
PM
1903 # Load every line as a JSON object and try to obtain an ASN from it...
1904 try:
1905 lineobj = json.loads(sline)
1906 except json.decoder.JSONDecodeError:
1907 log.error("Unable to parse line as a JSON object: %s" % sline)
69b3d894
PM
1908 continue
1909
5c7cfeb2
PM
1910 # Skip line contiaining file metadata
1911 try:
1912 type = lineobj["type"]
69b3d894 1913
5c7cfeb2
PM
1914 if type == "metadata":
1915 continue
1916 except KeyError:
1917 pass
69b3d894 1918
5c7cfeb2
PM
1919 try:
1920 asn = lineobj["asn"]
1921 as_name = lineobj["asname"]
1922 except KeyError:
1923 log.warning("Unable to extract necessary information from line: %s" % sline)
1924 continue
69b3d894
PM
1925
1926 # Filter invalid ASNs...
1927 if not self._check_parsed_asn(asn):
5acac2a4
PM
1928 log.warning("Skipping bogus ASN found in %s (%s): %s" % \
1929 (name, url, asn))
69b3d894
PM
1930 continue
1931
1932 # Conduct SQL statement...
1933 self.db.execute("""
99d15292
MT
1934 INSERT INTO
1935 autnum_feeds
1936 (
69b3d894
PM
1937 number,
1938 source,
1939 is_drop
99d15292
MT
1940 )
1941 VALUES
1942 (
1943 %s, %s, %s
1944 )""", "%s" % asn, name, True,
69b3d894
PM
1945 )
1946
28d29b7c
MT
1947 @staticmethod
1948 def _parse_bool(block, key):
1949 val = block.get(key)
1950
1951 # There is no point to proceed when we got None
1952 if val is None:
1953 return
1954
1955 # Convert to lowercase
1956 val = val.lower()
1957
1958 # True
1959 if val in ("yes", "1"):
1960 return True
1961
1962 # False
1963 if val in ("no", "0"):
1964 return False
1965
1966 # Default to None
1967 return None
1968
3ce28dea
MT
1969 @property
1970 def countries(self):
1971 # Fetch all valid country codes to check parsed networks aganist
1972 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
1973
1974 # Return all countries
1975 return [row.country_code for row in rows]
1976
8084b33a
MT
1977 def handle_import_countries(self, ns):
1978 with self.db.transaction():
1979 # Drop all data that we have
1980 self.db.execute("TRUNCATE TABLE countries")
1981
1982 for file in ns.file:
1983 for line in file:
1984 line = line.rstrip()
1985
1986 # Ignore any comments
1987 if line.startswith("#"):
1988 continue
1989
1990 try:
1991 country_code, continent_code, name = line.split(maxsplit=2)
1992 except:
1993 log.warning("Could not parse line: %s" % line)
1994 continue
1995
1996 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
1997 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
1998
6ffd06b5
MT
1999
2000def split_line(line):
2001 key, colon, val = line.partition(":")
2002
2003 # Strip any excess space
2004 key = key.strip()
2005 val = val.strip()
78ff0cf2 2006
6ffd06b5 2007 return key, val
78ff0cf2
MT
2008
2009def main():
2010 # Run the command line interface
2011 c = CLI()
2012 c.run()
2013
2014main()