]> git.ipfire.org Git - location/libloc.git/blob - src/scripts/location-importer.in
8d4749709ea72acd8ebd1396b25f165251f84167
[location/libloc.git] / src / scripts / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020-2022 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import json
23 import logging
24 import math
25 import re
26 import socket
27 import sys
28 import telnetlib
29 import urllib.error
30
31 # Load our location module
32 import location
33 import location.database
34 import location.importer
35 from location.i18n import _
36
37 # Initialise logging
38 log = logging.getLogger("location.importer")
39 log.propagate = 1
40
41 # Define constants
42 VALID_ASN_RANGES = (
43 (1, 23455),
44 (23457, 64495),
45 (131072, 4199999999),
46 )
47
48
49 class CLI(object):
50 def parse_cli(self):
51 parser = argparse.ArgumentParser(
52 description=_("Location Importer Command Line Interface"),
53 )
54 subparsers = parser.add_subparsers()
55
56 # Global configuration flags
57 parser.add_argument("--debug", action="store_true",
58 help=_("Enable debug output"))
59 parser.add_argument("--quiet", action="store_true",
60 help=_("Enable quiet mode"))
61
62 # version
63 parser.add_argument("--version", action="version",
64 version="%(prog)s @VERSION@")
65
66 # Database
67 parser.add_argument("--database-host", required=True,
68 help=_("Database Hostname"), metavar=_("HOST"))
69 parser.add_argument("--database-name", required=True,
70 help=_("Database Name"), metavar=_("NAME"))
71 parser.add_argument("--database-username", required=True,
72 help=_("Database Username"), metavar=_("USERNAME"))
73 parser.add_argument("--database-password", required=True,
74 help=_("Database Password"), metavar=_("PASSWORD"))
75
76 # Write Database
77 write = subparsers.add_parser("write", help=_("Write database to file"))
78 write.set_defaults(func=self.handle_write)
79 write.add_argument("file", nargs=1, help=_("Database File"))
80 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
81 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
82 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
83 write.add_argument("--description", nargs="?", help=_("Sets a description"))
84 write.add_argument("--license", nargs="?", help=_("Sets the license"))
85 write.add_argument("--version", type=int, help=_("Database Format Version"))
86
87 # Update WHOIS
88 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
89 update_whois.set_defaults(func=self.handle_update_whois)
90
91 # Update announcements
92 update_announcements = subparsers.add_parser("update-announcements",
93 help=_("Update BGP Annoucements"))
94 update_announcements.set_defaults(func=self.handle_update_announcements)
95 update_announcements.add_argument("server", nargs=1,
96 help=_("Route Server to connect to"), metavar=_("SERVER"))
97
98 # Update overrides
99 update_overrides = subparsers.add_parser("update-overrides",
100 help=_("Update overrides"),
101 )
102 update_overrides.add_argument(
103 "files", nargs="+", help=_("Files to import"),
104 )
105 update_overrides.set_defaults(func=self.handle_update_overrides)
106
107 # Import countries
108 import_countries = subparsers.add_parser("import-countries",
109 help=_("Import countries"),
110 )
111 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
112 help=_("File to import"))
113 import_countries.set_defaults(func=self.handle_import_countries)
114
115 args = parser.parse_args()
116
117 # Configure logging
118 if args.debug:
119 location.logger.set_level(logging.DEBUG)
120 elif args.quiet:
121 location.logger.set_level(logging.WARNING)
122
123 # Print usage if no action was given
124 if not "func" in args:
125 parser.print_usage()
126 sys.exit(2)
127
128 return args
129
130 def run(self):
131 # Parse command line arguments
132 args = self.parse_cli()
133
134 # Initialise database
135 self.db = self._setup_database(args)
136
137 # Call function
138 ret = args.func(args)
139
140 # Return with exit code
141 if ret:
142 sys.exit(ret)
143
144 # Otherwise just exit
145 sys.exit(0)
146
147 def _setup_database(self, ns):
148 """
149 Initialise the database
150 """
151 # Connect to database
152 db = location.database.Connection(
153 host=ns.database_host, database=ns.database_name,
154 user=ns.database_username, password=ns.database_password,
155 )
156
157 with db.transaction():
158 db.execute("""
159 -- announcements
160 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
161 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
162 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
163 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
164 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
165 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
166
167 -- autnums
168 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
169 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
170 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
171
172 -- countries
173 CREATE TABLE IF NOT EXISTS countries(
174 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
175 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
176
177 -- networks
178 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
179 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
180 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
181 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
182 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
183 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
184
185 -- overrides
186 CREATE TABLE IF NOT EXISTS autnum_overrides(
187 number bigint NOT NULL,
188 name text,
189 country text,
190 is_anonymous_proxy boolean,
191 is_satellite_provider boolean,
192 is_anycast boolean
193 );
194 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
195 ON autnum_overrides(number);
196 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
197 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
198
199 CREATE TABLE IF NOT EXISTS network_overrides(
200 network inet NOT NULL,
201 country text,
202 is_anonymous_proxy boolean,
203 is_satellite_provider boolean,
204 is_anycast boolean
205 );
206 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
207 ON network_overrides(network);
208 CREATE INDEX IF NOT EXISTS network_overrides_search
209 ON network_overrides USING GIST(network inet_ops);
210 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
211 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
212 """)
213
214 return db
215
216 def handle_write(self, ns):
217 """
218 Compiles a database in libloc format out of what is in the database
219 """
220 # Allocate a writer
221 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
222
223 # Set all metadata
224 if ns.vendor:
225 writer.vendor = ns.vendor
226
227 if ns.description:
228 writer.description = ns.description
229
230 if ns.license:
231 writer.license = ns.license
232
233 # Add all Autonomous Systems
234 log.info("Writing Autonomous Systems...")
235
236 # Select all ASes with a name
237 rows = self.db.query("""
238 SELECT
239 autnums.number AS number,
240 COALESCE(
241 (SELECT overrides.name FROM autnum_overrides overrides
242 WHERE overrides.number = autnums.number),
243 autnums.name
244 ) AS name
245 FROM autnums
246 WHERE name <> %s ORDER BY number
247 """, "")
248
249 for row in rows:
250 a = writer.add_as(row.number)
251 a.name = row.name
252
253 # Add all networks
254 log.info("Writing networks...")
255
256 # Select all known networks
257 rows = self.db.query("""
258 WITH known_networks AS (
259 SELECT network FROM announcements
260 UNION
261 SELECT network FROM networks
262 UNION
263 SELECT network FROM network_overrides
264 ),
265
266 ordered_networks AS (
267 SELECT
268 known_networks.network AS network,
269 announcements.autnum AS autnum,
270 networks.country AS country,
271
272 -- Must be part of returned values for ORDER BY clause
273 masklen(announcements.network) AS sort_a,
274 masklen(networks.network) AS sort_b
275 FROM
276 known_networks
277 LEFT JOIN
278 announcements ON known_networks.network <<= announcements.network
279 LEFT JOIN
280 networks ON known_networks.network <<= networks.network
281 ORDER BY
282 known_networks.network,
283 sort_a DESC,
284 sort_b DESC
285 )
286
287 -- Return a list of those networks enriched with all
288 -- other information that we store in the database
289 SELECT
290 DISTINCT ON (network)
291 network,
292 autnum,
293
294 -- Country
295 COALESCE(
296 (
297 SELECT country FROM network_overrides overrides
298 WHERE networks.network <<= overrides.network
299 ORDER BY masklen(overrides.network) DESC
300 LIMIT 1
301 ),
302 (
303 SELECT country FROM autnum_overrides overrides
304 WHERE networks.autnum = overrides.number
305 ),
306 networks.country
307 ) AS country,
308
309 -- Flags
310 COALESCE(
311 (
312 SELECT is_anonymous_proxy FROM network_overrides overrides
313 WHERE networks.network <<= overrides.network
314 ORDER BY masklen(overrides.network) DESC
315 LIMIT 1
316 ),
317 (
318 SELECT is_anonymous_proxy FROM autnum_overrides overrides
319 WHERE networks.autnum = overrides.number
320 ),
321 FALSE
322 ) AS is_anonymous_proxy,
323 COALESCE(
324 (
325 SELECT is_satellite_provider FROM network_overrides overrides
326 WHERE networks.network <<= overrides.network
327 ORDER BY masklen(overrides.network) DESC
328 LIMIT 1
329 ),
330 (
331 SELECT is_satellite_provider FROM autnum_overrides overrides
332 WHERE networks.autnum = overrides.number
333 ),
334 FALSE
335 ) AS is_satellite_provider,
336 COALESCE(
337 (
338 SELECT is_anycast FROM network_overrides overrides
339 WHERE networks.network <<= overrides.network
340 ORDER BY masklen(overrides.network) DESC
341 LIMIT 1
342 ),
343 (
344 SELECT is_anycast FROM autnum_overrides overrides
345 WHERE networks.autnum = overrides.number
346 ),
347 FALSE
348 ) AS is_anycast,
349 COALESCE(
350 (
351 SELECT is_drop FROM network_overrides overrides
352 WHERE networks.network <<= overrides.network
353 ORDER BY masklen(overrides.network) DESC
354 LIMIT 1
355 ),
356 (
357 SELECT is_drop FROM autnum_overrides overrides
358 WHERE networks.autnum = overrides.number
359 ),
360 FALSE
361 ) AS is_drop
362 FROM
363 ordered_networks networks
364 """)
365
366 for row in rows:
367 network = writer.add_network(row.network)
368
369 # Save country
370 if row.country:
371 network.country_code = row.country
372
373 # Save ASN
374 if row.autnum:
375 network.asn = row.autnum
376
377 # Set flags
378 if row.is_anonymous_proxy:
379 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
380
381 if row.is_satellite_provider:
382 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
383
384 if row.is_anycast:
385 network.set_flag(location.NETWORK_FLAG_ANYCAST)
386
387 if row.is_drop:
388 network.set_flag(location.NETWORK_FLAG_DROP)
389
390 # Add all countries
391 log.info("Writing countries...")
392 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
393
394 for row in rows:
395 c = writer.add_country(row.country_code)
396 c.continent_code = row.continent_code
397 c.name = row.name
398
399 # Write everything to file
400 log.info("Writing database to file...")
401 for file in ns.file:
402 writer.write(file)
403
404 def handle_update_whois(self, ns):
405 downloader = location.importer.Downloader()
406
407 # Did we run successfully?
408 error = False
409
410 # Fetch all valid country codes to check parsed networks aganist
411 validcountries = self.countries
412
413 # Iterate over all potential sources
414 for source in sorted(location.importer.SOURCES):
415 with self.db.transaction():
416 # Create some temporary tables to store parsed data
417 self.db.execute("""
418 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL,
419 organization text NOT NULL, source text NOT NULL) ON COMMIT DROP;
420 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
421
422 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL,
423 name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
424 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
425
426 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL,
427 original_countries text[] NOT NULL, source text NOT NULL)
428 ON COMMIT DROP;
429 CREATE INDEX _rirdata_search ON _rirdata
430 USING BTREE(family(network), masklen(network));
431 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
432 """)
433
434 # Remove all previously imported content
435 self.db.execute("DELETE FROM networks WHERE source = %s", source)
436
437 try:
438 # Fetch WHOIS sources
439 for url in location.importer.WHOIS_SOURCES.get(source, []):
440 for block in downloader.request_blocks(url):
441 self._parse_block(block, source, validcountries)
442
443 # Fetch extended sources
444 for url in location.importer.EXTENDED_SOURCES.get(source, []):
445 for line in downloader.request_lines(url):
446 self._parse_line(line, source, validcountries)
447 except urllib.error.URLError as e:
448 log.error("Could not retrieve data from %s: %s" % (source, e))
449 error = True
450
451 # Continue with the next source
452 continue
453
454 # Process all parsed networks from every RIR we happen to have access to,
455 # insert the largest network chunks into the networks table immediately...
456 families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata \
457 ORDER BY family(network)")
458
459 for family in (row.family for row in families):
460 # Fetch the smallest mask length in our data set
461 smallest = self.db.get("""
462 SELECT
463 MIN(
464 masklen(network)
465 ) AS prefix
466 FROM
467 _rirdata
468 WHERE
469 family(network) = %s""",
470 family,
471 )
472
473 # Copy all networks
474 self.db.execute("""
475 INSERT INTO
476 networks
477 (
478 network,
479 country,
480 original_countries,
481 source
482 )
483 SELECT
484 network,
485 country,
486 original_countries,
487 source
488 FROM
489 _rirdata
490 WHERE
491 masklen(network) = %s
492 AND
493 family(network) = %s
494 ON CONFLICT DO
495 NOTHING""",
496 smallest.prefix,
497 family,
498 )
499
500 # ... determine any other prefixes for this network family, ...
501 prefixes = self.db.query("""
502 SELECT
503 DISTINCT masklen(network) AS prefix
504 FROM
505 _rirdata
506 WHERE
507 family(network) = %s
508 ORDER BY
509 masklen(network) ASC
510 OFFSET 1""",
511 family,
512 )
513
514 # ... and insert networks with this prefix in case they provide additional
515 # information (i. e. subnet of a larger chunk with a different country)
516 for prefix in (row.prefix for row in prefixes):
517 self.db.execute("""
518 WITH candidates AS (
519 SELECT
520 _rirdata.network,
521 _rirdata.country,
522 _rirdata.original_countries,
523 _rirdata.source
524 FROM
525 _rirdata
526 WHERE
527 family(_rirdata.network) = %s
528 AND
529 masklen(_rirdata.network) = %s
530 ),
531 filtered AS (
532 SELECT
533 DISTINCT ON (c.network)
534 c.network,
535 c.country,
536 c.original_countries,
537 c.source,
538 masklen(networks.network),
539 networks.country AS parent_country
540 FROM
541 candidates c
542 LEFT JOIN
543 networks
544 ON
545 c.network << networks.network
546 ORDER BY
547 c.network,
548 masklen(networks.network) DESC NULLS LAST
549 )
550 INSERT INTO
551 networks(network, country, original_countries, source)
552 SELECT
553 network,
554 country,
555 original_countries,
556 source
557 FROM
558 filtered
559 WHERE
560 parent_country IS NULL
561 OR
562 country <> parent_country
563 ON CONFLICT DO NOTHING""",
564 family, prefix,
565 )
566
567 self.db.execute("""
568 INSERT INTO autnums(number, name, source)
569 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
570 JOIN _organizations ON _autnums.organization = _organizations.handle
571 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
572 """)
573
574 # Download and import (technical) AS names from ARIN
575 with self.db.transaction():
576 self._import_as_names_from_arin()
577
578 # Return a non-zero exit code for errors
579 return 1 if error else 0
580
581 def _check_parsed_network(self, network):
582 """
583 Assistive function to detect and subsequently sort out parsed
584 networks from RIR data (both Whois and so-called "extended sources"),
585 which are or have...
586
587 (a) not globally routable (RFC 1918 space, et al.)
588 (b) covering a too large chunk of the IP address space (prefix length
589 is < 7 for IPv4 networks, and < 10 for IPv6)
590 (c) "0.0.0.0" or "::" as a network address
591 (d) are too small for being publicly announced (we have decided not to
592 process them at the moment, as they significantly enlarge our
593 database without providing very helpful additional information)
594
595 This unfortunately is necessary due to brain-dead clutter across
596 various RIR databases, causing mismatches and eventually disruptions.
597
598 We will return False in case a network is not suitable for adding
599 it to our database, and True otherwise.
600 """
601
602 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
603 return False
604
605 if not network.is_global:
606 log.debug("Skipping non-globally routable network: %s" % network)
607 return False
608
609 if network.version == 4:
610 if network.prefixlen < 7:
611 log.debug("Skipping too big IP chunk: %s" % network)
612 return False
613
614 if network.prefixlen > 24:
615 log.debug("Skipping network too small to be publicly announced: %s" % network)
616 return False
617
618 if str(network.network_address) == "0.0.0.0":
619 log.debug("Skipping network based on 0.0.0.0: %s" % network)
620 return False
621
622 elif network.version == 6:
623 if network.prefixlen < 10:
624 log.debug("Skipping too big IP chunk: %s" % network)
625 return False
626
627 if network.prefixlen > 48:
628 log.debug("Skipping network too small to be publicly announced: %s" % network)
629 return False
630
631 if str(network.network_address) == "::":
632 log.debug("Skipping network based on '::': %s" % network)
633 return False
634
635 else:
636 # This should not happen...
637 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
638 return False
639
640 # In case we have made it here, the network is considered to
641 # be suitable for libloc consumption...
642 return True
643
644 def _check_parsed_asn(self, asn):
645 """
646 Assistive function to filter Autonomous System Numbers not being suitable
647 for adding to our database. Returns False in such cases, and True otherwise.
648 """
649
650 for start, end in VALID_ASN_RANGES:
651 if start <= asn and end >= asn:
652 return True
653
654 log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
655 return False
656
657 def _parse_block(self, block, source_key, validcountries = None):
658 # Get first line to find out what type of block this is
659 line = block[0]
660
661 # aut-num
662 if line.startswith("aut-num:"):
663 return self._parse_autnum_block(block, source_key)
664
665 # inetnum
666 if line.startswith("inet6num:") or line.startswith("inetnum:"):
667 return self._parse_inetnum_block(block, source_key, validcountries)
668
669 # organisation
670 elif line.startswith("organisation:"):
671 return self._parse_org_block(block, source_key)
672
673 def _parse_autnum_block(self, block, source_key):
674 autnum = {}
675 for line in block:
676 # Split line
677 key, val = split_line(line)
678
679 if key == "aut-num":
680 m = re.match(r"^(AS|as)(\d+)", val)
681 if m:
682 autnum["asn"] = m.group(2)
683
684 elif key == "org":
685 autnum[key] = val.upper()
686
687 elif key == "descr":
688 # Save the first description line as well...
689 if not key in autnum:
690 autnum[key] = val
691
692 # Skip empty objects
693 if not autnum or not "asn" in autnum:
694 return
695
696 # Insert a dummy organisation handle into our temporary organisations
697 # table in case the AS does not have an organisation handle set, but
698 # has a description (a quirk often observed in APNIC area), so we can
699 # later display at least some string for this AS.
700 if not "org" in autnum:
701 if "descr" in autnum:
702 autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn")
703
704 self.db.execute("INSERT INTO _organizations(handle, name, source) \
705 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
706 autnum.get("org"), autnum.get("descr"), source_key,
707 )
708 else:
709 log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
710 autnum.get("asn"))
711 return
712
713 # Insert into database
714 self.db.execute("INSERT INTO _autnums(number, organization, source) \
715 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
716 organization = excluded.organization",
717 autnum.get("asn"), autnum.get("org"), source_key,
718 )
719
720 def _parse_inetnum_block(self, block, source_key, validcountries = None):
721 log.debug("Parsing inetnum block:")
722
723 inetnum = {}
724 for line in block:
725 log.debug(line)
726
727 # Split line
728 key, val = split_line(line)
729
730 # Filter any inetnum records which are only referring to IP space
731 # not managed by that specific RIR...
732 if key == "netname":
733 if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()):
734 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
735 return
736
737 if key == "inetnum":
738 start_address, delim, end_address = val.partition("-")
739
740 # Strip any excess space
741 start_address, end_address = start_address.rstrip(), end_address.strip()
742
743 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
744 if start_address and not (delim or end_address):
745 try:
746 start_address = ipaddress.ip_network(start_address, strict=False)
747 except ValueError:
748 start_address = start_address.split("/")
749 ldigits = start_address[0].count(".")
750
751 # How many octets do we need to add?
752 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
753 if ldigits == 1:
754 start_address = start_address[0] + ".0.0/" + start_address[1]
755 elif ldigits == 2:
756 start_address = start_address[0] + ".0/" + start_address[1]
757 else:
758 log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line)
759 return
760
761 try:
762 start_address = ipaddress.ip_network(start_address, strict=False)
763 except ValueError:
764 log.warning("Could not parse line in LACNIC DB format: %s" % line)
765 return
766
767 # Enumerate first and last IP address of this network
768 end_address = start_address[-1]
769 start_address = start_address[0]
770
771 else:
772 # Convert to IP address
773 try:
774 start_address = ipaddress.ip_address(start_address)
775 end_address = ipaddress.ip_address(end_address)
776 except ValueError:
777 log.warning("Could not parse line: %s" % line)
778 return
779
780 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
781
782 elif key == "inet6num":
783 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
784
785 elif key == "country":
786 val = val.upper()
787
788 # Catch RIR data objects with more than one country code...
789 if not key in inetnum:
790 inetnum[key] = []
791 else:
792 if val in inetnum.get("country"):
793 # ... but keep this list distinct...
794 continue
795
796 # When people set country codes to "UK", they actually mean "GB"
797 if val == "UK":
798 val = "GB"
799
800 inetnum[key].append(val)
801
802 # Skip empty objects
803 if not inetnum or not "country" in inetnum:
804 return
805
806 # Prepare skipping objects with unknown country codes...
807 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
808
809 # Iterate through all networks enumerated from above, check them for plausibility and insert
810 # them into the database, if _check_parsed_network() succeeded
811 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
812 if self._check_parsed_network(single_network):
813
814 # Skip objects with unknown country codes if they are valid to avoid log spam...
815 if validcountries and invalidcountries:
816 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
817 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
818 break
819
820 # Everything is fine here, run INSERT statement...
821 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
822 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
823 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
824 )
825
826 def _parse_org_block(self, block, source_key):
827 org = {}
828 for line in block:
829 # Split line
830 key, val = split_line(line)
831
832 if key == "organisation":
833 org[key] = val.upper()
834 elif key == "org-name":
835 org[key] = val
836
837 # Skip empty objects
838 if not org:
839 return
840
841 self.db.execute("INSERT INTO _organizations(handle, name, source) \
842 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
843 UPDATE SET name = excluded.name",
844 org.get("organisation"), org.get("org-name"), source_key,
845 )
846
847 def _parse_line(self, line, source_key, validcountries = None):
848 # Skip version line
849 if line.startswith("2"):
850 return
851
852 # Skip comments
853 if line.startswith("#"):
854 return
855
856 try:
857 registry, country_code, type, line = line.split("|", 3)
858 except:
859 log.warning("Could not parse line: %s" % line)
860 return
861
862 # Skip any lines that are for stats only or do not have a country
863 # code at all (avoids log spam below)
864 if not country_code or country_code == '*':
865 return
866
867 # Skip objects with unknown country codes
868 if validcountries and country_code not in validcountries:
869 log.warning("Skipping line with bogus country '%s': %s" % \
870 (country_code, line))
871 return
872
873 if type in ("ipv6", "ipv4"):
874 return self._parse_ip_line(country_code, type, line, source_key)
875
876 def _parse_ip_line(self, country, type, line, source_key):
877 try:
878 address, prefix, date, status, organization = line.split("|")
879 except ValueError:
880 organization = None
881
882 # Try parsing the line without organization
883 try:
884 address, prefix, date, status = line.split("|")
885 except ValueError:
886 log.warning("Unhandled line format: %s" % line)
887 return
888
889 # Skip anything that isn't properly assigned
890 if not status in ("assigned", "allocated"):
891 return
892
893 # Cast prefix into an integer
894 try:
895 prefix = int(prefix)
896 except:
897 log.warning("Invalid prefix: %s" % prefix)
898 return
899
900 # Fix prefix length for IPv4
901 if type == "ipv4":
902 prefix = 32 - int(math.log(prefix, 2))
903
904 # Try to parse the address
905 try:
906 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
907 except ValueError:
908 log.warning("Invalid IP address: %s" % address)
909 return
910
911 if not self._check_parsed_network(network):
912 return
913
914 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
915 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
916 UPDATE SET country = excluded.country",
917 "%s" % network, country, [country], source_key,
918 )
919
920 def _import_as_names_from_arin(self):
921 downloader = location.importer.Downloader()
922
923 # XXX: Download AS names file from ARIN (note that these names appear to be quite
924 # technical, not intended for human consumption, as description fields in
925 # organisation handles for other RIRs are - however, this is what we have got,
926 # and in some cases, it might be still better than nothing)
927 for line in downloader.request_lines("https://ftp.arin.net/info/asn.txt"):
928 # Valid lines start with a space, followed by the number of the Autonomous System ...
929 if not line.startswith(" "):
930 continue
931
932 # Split line and check if there is a valid ASN in it...
933 asn, name = line.split()[0:2]
934
935 try:
936 asn = int(asn)
937 except ValueError:
938 log.debug("Skipping ARIN AS names line not containing an integer for ASN")
939 continue
940
941 # Filter invalid ASNs...
942 if not self._check_parsed_asn(asn):
943 continue
944
945 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
946 if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|LACNIC|RIPE|IANA)(?:\d?$|\-)", name):
947 continue
948
949 # Bail out in case the AS name contains anything we do not expect here...
950 if re.search(r"[^a-zA-Z0-9-_]", name):
951 log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
952 (asn, name))
953
954 # Things look good here, run INSERT statement and skip this one if we already have
955 # a (better?) name for this Autonomous System...
956 self.db.execute("""
957 INSERT INTO autnums(
958 number,
959 name,
960 source
961 ) VALUES (%s, %s, %s)
962 ON CONFLICT (number) DO NOTHING""",
963 asn,
964 name,
965 "ARIN",
966 )
967
968 def handle_update_announcements(self, ns):
969 server = ns.server[0]
970
971 with self.db.transaction():
972 if server.startswith("/"):
973 self._handle_update_announcements_from_bird(server)
974 else:
975 self._handle_update_announcements_from_telnet(server)
976
977 # Purge anything we never want here
978 self.db.execute("""
979 -- Delete default routes
980 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
981
982 -- Delete anything that is not global unicast address space
983 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
984
985 -- DELETE "current network" address space
986 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
987
988 -- DELETE local loopback address space
989 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
990
991 -- DELETE RFC 1918 address space
992 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
993 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
994 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
995
996 -- DELETE test, benchmark and documentation address space
997 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
998 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
999 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
1000 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
1001 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
1002
1003 -- DELETE CGNAT address space (RFC 6598)
1004 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
1005
1006 -- DELETE link local address space
1007 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
1008
1009 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
1010 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
1011 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
1012
1013 -- DELETE multicast and reserved address space
1014 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
1015 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
1016
1017 -- Delete networks that are too small to be in the global routing table
1018 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
1019 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
1020
1021 -- Delete any non-public or reserved ASNs
1022 DELETE FROM announcements WHERE NOT (
1023 (autnum >= 1 AND autnum <= 23455)
1024 OR
1025 (autnum >= 23457 AND autnum <= 64495)
1026 OR
1027 (autnum >= 131072 AND autnum <= 4199999999)
1028 );
1029
1030 -- Delete everything that we have not seen for 14 days
1031 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
1032 """)
1033
1034 def _handle_update_announcements_from_bird(self, server):
1035 # Pre-compile the regular expression for faster searching
1036 route = re.compile(b"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
1037
1038 log.info("Requesting routing table from Bird (%s)" % server)
1039
1040 aggregated_networks = []
1041
1042 # Send command to list all routes
1043 for line in self._bird_cmd(server, "show route"):
1044 m = route.match(line)
1045 if not m:
1046 # Skip empty lines
1047 if not line:
1048 pass
1049
1050 # Ignore any header lines with the name of the routing table
1051 elif line.startswith(b"Table"):
1052 pass
1053
1054 # Log anything else
1055 else:
1056 log.debug("Could not parse line: %s" % line.decode())
1057
1058 continue
1059
1060 # Fetch the extracted network and ASN
1061 network, autnum = m.groups()
1062
1063 # Decode into strings
1064 if network:
1065 network = network.decode()
1066 if autnum:
1067 autnum = autnum.decode()
1068
1069 # Collect all aggregated networks
1070 if not autnum:
1071 log.debug("%s is an aggregated network" % network)
1072 aggregated_networks.append(network)
1073 continue
1074
1075 # Insert it into the database
1076 self.db.execute("INSERT INTO announcements(network, autnum) \
1077 VALUES(%s, %s) ON CONFLICT (network) DO \
1078 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1079 network, autnum,
1080 )
1081
1082 # Process any aggregated networks
1083 for network in aggregated_networks:
1084 log.debug("Processing aggregated network %s" % network)
1085
1086 # Run "show route all" for each network
1087 for line in self._bird_cmd(server, "show route %s all" % network):
1088 # Try finding the path
1089 m = re.match(b"\s+BGP\.as_path:.* (\d+) {\d+}$", line)
1090 if m:
1091 # Select the last AS number in the path
1092 autnum = m.group(1).decode()
1093
1094 # Insert it into the database
1095 self.db.execute("INSERT INTO announcements(network, autnum) \
1096 VALUES(%s, %s) ON CONFLICT (network) DO \
1097 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1098 network, autnum,
1099 )
1100
1101 # We don't need to process any more
1102 break
1103
1104 def _handle_update_announcements_from_telnet(self, server):
1105 # Pre-compile regular expression for routes
1106 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
1107
1108 with telnetlib.Telnet(server) as t:
1109 # Enable debug mode
1110 #if ns.debug:
1111 # t.set_debuglevel(10)
1112
1113 # Wait for console greeting
1114 greeting = t.read_until(b"> ", timeout=30)
1115 if not greeting:
1116 log.error("Could not get a console prompt")
1117 return 1
1118
1119 # Disable pagination
1120 t.write(b"terminal length 0\n")
1121
1122 # Wait for the prompt to return
1123 t.read_until(b"> ")
1124
1125 # Fetch the routing tables
1126 for protocol in ("ipv6", "ipv4"):
1127 log.info("Requesting %s routing table" % protocol)
1128
1129 # Request the full unicast routing table
1130 t.write(b"show bgp %s unicast\n" % protocol.encode())
1131
1132 # Read entire header which ends with "Path"
1133 t.read_until(b"Path\r\n")
1134
1135 while True:
1136 # Try reading a full entry
1137 # Those might be broken across multiple lines but ends with i
1138 line = t.read_until(b"i\r\n", timeout=5)
1139 if not line:
1140 break
1141
1142 # Show line for debugging
1143 #log.debug(repr(line))
1144
1145 # Try finding a route in here
1146 m = route.match(line)
1147 if m:
1148 network, autnum = m.groups()
1149
1150 # Convert network to string
1151 network = network.decode()
1152
1153 # Append /24 for IPv4 addresses
1154 if not "/" in network and not ":" in network:
1155 network = "%s/24" % network
1156
1157 # Convert AS number to integer
1158 autnum = int(autnum)
1159
1160 log.info("Found announcement for %s by %s" % (network, autnum))
1161
1162 self.db.execute("INSERT INTO announcements(network, autnum) \
1163 VALUES(%s, %s) ON CONFLICT (network) DO \
1164 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1165 network, autnum,
1166 )
1167
1168 log.info("Finished reading the %s routing table" % protocol)
1169
1170 def _bird_cmd(self, socket_path, command):
1171 # Connect to the socket
1172 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1173 s.connect(socket_path)
1174
1175 # Allocate some buffer
1176 buffer = b""
1177
1178 log.debug("Sending Bird command: %s" % command)
1179
1180 # Send the command
1181 s.send(b"%s\n" % command.encode())
1182
1183 while True:
1184 # Fill up the buffer
1185 buffer += s.recv(4096)
1186
1187 while True:
1188 # Search for the next newline
1189 pos = buffer.find(b"\n")
1190
1191 # If we cannot find one, we go back and read more data
1192 if pos <= 0:
1193 break
1194
1195 # Cut after the newline character
1196 pos += 1
1197
1198 # Split the line we want and keep the rest in buffer
1199 line, buffer = buffer[:pos], buffer[pos:]
1200
1201 # Try parsing any status lines
1202 if len(line) > 4 and line[:4].isdigit() and line[4] in (32, 45):
1203 code, delim, line = int(line[:4]), line[4], line[5:]
1204
1205 log.debug("Received response code %s from bird" % code)
1206
1207 # End of output
1208 if code == 0:
1209 return
1210
1211 # Ignore hello line
1212 elif code == 1:
1213 continue
1214
1215 # Otherwise return the line
1216 yield line
1217
1218 def handle_update_overrides(self, ns):
1219 with self.db.transaction():
1220 # Only drop manually created overrides, as we can be reasonably sure to have them,
1221 # and preserve the rest. If appropriate, it is deleted by correspondent functions.
1222 self.db.execute("""
1223 DELETE FROM autnum_overrides WHERE source = 'manual';
1224 DELETE FROM network_overrides WHERE source = 'manual';
1225 """)
1226
1227 # Update overrides for various cloud providers big enough to publish their own IP
1228 # network allocation lists in a machine-readable format...
1229 self._update_overrides_for_aws()
1230
1231 # Update overrides for Spamhaus DROP feeds...
1232 self._update_overrides_for_spamhaus_drop()
1233
1234 for file in ns.files:
1235 log.info("Reading %s..." % file)
1236
1237 with open(file, "rb") as f:
1238 for type, block in location.importer.read_blocks(f):
1239 if type == "net":
1240 network = block.get("net")
1241 # Try to parse and normalise the network
1242 try:
1243 network = ipaddress.ip_network(network, strict=False)
1244 except ValueError as e:
1245 log.warning("Invalid IP network: %s: %s" % (network, e))
1246 continue
1247
1248 # Prevent that we overwrite all networks
1249 if network.prefixlen == 0:
1250 log.warning("Skipping %s: You cannot overwrite default" % network)
1251 continue
1252
1253 self.db.execute("""
1254 INSERT INTO network_overrides(
1255 network,
1256 country,
1257 source,
1258 is_anonymous_proxy,
1259 is_satellite_provider,
1260 is_anycast,
1261 is_drop
1262 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
1263 ON CONFLICT (network) DO NOTHING""",
1264 "%s" % network,
1265 block.get("country"),
1266 "manual",
1267 self._parse_bool(block, "is-anonymous-proxy"),
1268 self._parse_bool(block, "is-satellite-provider"),
1269 self._parse_bool(block, "is-anycast"),
1270 self._parse_bool(block, "drop"),
1271 )
1272
1273 elif type == "aut-num":
1274 autnum = block.get("aut-num")
1275
1276 # Check if AS number begins with "AS"
1277 if not autnum.startswith("AS"):
1278 log.warning("Invalid AS number: %s" % autnum)
1279 continue
1280
1281 # Strip "AS"
1282 autnum = autnum[2:]
1283
1284 self.db.execute("""
1285 INSERT INTO autnum_overrides(
1286 number,
1287 name,
1288 country,
1289 source,
1290 is_anonymous_proxy,
1291 is_satellite_provider,
1292 is_anycast,
1293 is_drop
1294 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
1295 ON CONFLICT DO NOTHING""",
1296 autnum,
1297 block.get("name"),
1298 block.get("country"),
1299 "manual",
1300 self._parse_bool(block, "is-anonymous-proxy"),
1301 self._parse_bool(block, "is-satellite-provider"),
1302 self._parse_bool(block, "is-anycast"),
1303 self._parse_bool(block, "drop"),
1304 )
1305
1306 else:
1307 log.warning("Unsupported type: %s" % type)
1308
1309 def _update_overrides_for_aws(self):
1310 # Download Amazon AWS IP allocation file to create overrides...
1311 downloader = location.importer.Downloader()
1312
1313 try:
1314 # Fetch IP ranges
1315 f = downloader.retrieve("https://ip-ranges.amazonaws.com/ip-ranges.json")
1316
1317 # Parse downloaded file
1318 aws_ip_dump = json.load(f)
1319 except Exception as e:
1320 log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
1321 return
1322
1323 # At this point, we can assume the downloaded file to be valid
1324 self.db.execute("""
1325 DELETE FROM network_overrides WHERE source = 'Amazon AWS IP feed';
1326 """)
1327
1328 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1329 # there seems to be no machine-readable version available of this other than
1330 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1331 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1332 # was helpful here as well.
1333 aws_region_country_map = {
1334 "af-south-1": "ZA",
1335 "ap-east-1": "HK",
1336 "ap-south-1": "IN",
1337 "ap-south-2": "IN",
1338 "ap-northeast-3": "JP",
1339 "ap-northeast-2": "KR",
1340 "ap-southeast-1": "SG",
1341 "ap-southeast-2": "AU",
1342 "ap-southeast-3": "MY",
1343 "ap-southeast-4": "AU",
1344 "ap-northeast-1": "JP",
1345 "ca-central-1": "CA",
1346 "eu-central-1": "DE",
1347 "eu-central-2": "CH",
1348 "eu-west-1": "IE",
1349 "eu-west-2": "GB",
1350 "eu-south-1": "IT",
1351 "eu-south-2": "ES",
1352 "eu-west-3": "FR",
1353 "eu-north-1": "SE",
1354 "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
1355 "me-central-1": "AE",
1356 "me-south-1": "BH",
1357 "sa-east-1": "BR"
1358 }
1359
1360 # Fetch all valid country codes to check parsed networks aganist...
1361 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
1362 validcountries = []
1363
1364 for row in rows:
1365 validcountries.append(row.country_code)
1366
1367 with self.db.transaction():
1368 for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
1369 try:
1370 network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
1371 except ValueError:
1372 log.warning("Unable to parse line: %s" % snetwork)
1373 continue
1374
1375 # Sanitize parsed networks...
1376 if not self._check_parsed_network(network):
1377 continue
1378
1379 # Determine region of this network...
1380 region = snetwork["region"]
1381 cc = None
1382 is_anycast = False
1383
1384 # Any region name starting with "us-" will get "US" country code assigned straight away...
1385 if region.startswith("us-"):
1386 cc = "US"
1387 elif region.startswith("cn-"):
1388 # ... same goes for China ...
1389 cc = "CN"
1390 elif region == "GLOBAL":
1391 # ... funny region name for anycast-like networks ...
1392 is_anycast = True
1393 elif region in aws_region_country_map:
1394 # ... assign looked up country code otherwise ...
1395 cc = aws_region_country_map[region]
1396 else:
1397 # ... and bail out if we are missing something here
1398 log.warning("Unable to determine country code for line: %s" % snetwork)
1399 continue
1400
1401 # Skip networks with unknown country codes
1402 if not is_anycast and validcountries and cc not in validcountries:
1403 log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1404 (cc, network))
1405 return
1406
1407 # Conduct SQL statement...
1408 self.db.execute("""
1409 INSERT INTO network_overrides(
1410 network,
1411 country,
1412 source,
1413 is_anonymous_proxy,
1414 is_satellite_provider,
1415 is_anycast
1416 ) VALUES (%s, %s, %s, %s, %s, %s)
1417 ON CONFLICT (network) DO NOTHING""",
1418 "%s" % network,
1419 cc,
1420 "Amazon AWS IP feed",
1421 None,
1422 None,
1423 is_anycast,
1424 )
1425
1426
1427 def _update_overrides_for_spamhaus_drop(self):
1428 downloader = location.importer.Downloader()
1429
1430 ip_urls = [
1431 "https://www.spamhaus.org/drop/drop.txt",
1432 "https://www.spamhaus.org/drop/edrop.txt",
1433 "https://www.spamhaus.org/drop/dropv6.txt"
1434 ]
1435
1436 asn_urls = [
1437 "https://www.spamhaus.org/drop/asndrop.txt"
1438 ]
1439
1440 for url in ip_urls:
1441 # Fetch IP list
1442 f = downloader.retrieve(url)
1443
1444 # Split into lines
1445 fcontent = f.readlines()
1446
1447 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
1448 # downloads.
1449 if len(fcontent) > 10:
1450 self.db.execute("""
1451 DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list';
1452 DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists';
1453 """)
1454 else:
1455 log.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url)
1456 continue
1457
1458 # Iterate through every line, filter comments and add remaining networks to
1459 # the override table in case they are valid...
1460 with self.db.transaction():
1461 for sline in fcontent:
1462 # The response is assumed to be encoded in UTF-8...
1463 sline = sline.decode("utf-8")
1464
1465 # Comments start with a semicolon...
1466 if sline.startswith(";"):
1467 continue
1468
1469 # Extract network and ignore anything afterwards...
1470 try:
1471 network = ipaddress.ip_network(sline.split()[0], strict=False)
1472 except ValueError:
1473 log.error("Unable to parse line: %s" % sline)
1474 continue
1475
1476 # Sanitize parsed networks...
1477 if not self._check_parsed_network(network):
1478 log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
1479 (url, network))
1480 continue
1481
1482 # Conduct SQL statement...
1483 self.db.execute("""
1484 INSERT INTO network_overrides(
1485 network,
1486 source,
1487 is_drop
1488 ) VALUES (%s, %s, %s)
1489 ON CONFLICT (network) DO UPDATE SET is_drop = True""",
1490 "%s" % network,
1491 "Spamhaus DROP lists",
1492 True
1493 )
1494
1495 for url in asn_urls:
1496 # Fetch URL
1497 f = downloader.retrieve(url)
1498
1499 # Iterate through every line, filter comments and add remaining ASNs to
1500 # the override table in case they are valid...
1501 with self.db.transaction():
1502 for sline in f.readlines():
1503 # The response is assumed to be encoded in UTF-8...
1504 sline = sline.decode("utf-8")
1505
1506 # Comments start with a semicolon...
1507 if sline.startswith(";"):
1508 continue
1509
1510 # Throw away anything after the first space...
1511 sline = sline.split()[0]
1512
1513 # ... strip the "AS" prefix from it ...
1514 sline = sline.strip("AS")
1515
1516 # ... and convert it into an integer. Voila.
1517 asn = int(sline)
1518
1519 # Filter invalid ASNs...
1520 if not self._check_parsed_asn(asn):
1521 log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
1522 (url, asn))
1523 continue
1524
1525 # Conduct SQL statement...
1526 self.db.execute("""
1527 INSERT INTO autnum_overrides(
1528 number,
1529 source,
1530 is_drop
1531 ) VALUES (%s, %s, %s)
1532 ON CONFLICT (number) DO UPDATE SET is_drop = True""",
1533 "%s" % asn,
1534 "Spamhaus ASN-DROP list",
1535 True
1536 )
1537
1538 @staticmethod
1539 def _parse_bool(block, key):
1540 val = block.get(key)
1541
1542 # There is no point to proceed when we got None
1543 if val is None:
1544 return
1545
1546 # Convert to lowercase
1547 val = val.lower()
1548
1549 # True
1550 if val in ("yes", "1"):
1551 return True
1552
1553 # False
1554 if val in ("no", "0"):
1555 return False
1556
1557 # Default to None
1558 return None
1559
1560 @property
1561 def countries(self):
1562 # Fetch all valid country codes to check parsed networks aganist
1563 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
1564
1565 # Return all countries
1566 return [row.country_code for row in rows]
1567
1568 def handle_import_countries(self, ns):
1569 with self.db.transaction():
1570 # Drop all data that we have
1571 self.db.execute("TRUNCATE TABLE countries")
1572
1573 for file in ns.file:
1574 for line in file:
1575 line = line.rstrip()
1576
1577 # Ignore any comments
1578 if line.startswith("#"):
1579 continue
1580
1581 try:
1582 country_code, continent_code, name = line.split(maxsplit=2)
1583 except:
1584 log.warning("Could not parse line: %s" % line)
1585 continue
1586
1587 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
1588 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
1589
1590
1591 def split_line(line):
1592 key, colon, val = line.partition(":")
1593
1594 # Strip any excess space
1595 key = key.strip()
1596 val = val.strip()
1597
1598 return key, val
1599
1600 def main():
1601 # Run the command line interface
1602 c = CLI()
1603 c.run()
1604
1605 main()