]> git.ipfire.org Git - people/ms/libloc.git/blob - src/scripts/location-importer.in
importer: Actually perform the Spamhaus sanity check
[people/ms/libloc.git] / src / scripts / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020-2022 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import json
23 import logging
24 import math
25 import re
26 import socket
27 import sys
28 import telnetlib
29
30 # Load our location module
31 import location
32 import location.database
33 import location.importer
34 from location.i18n import _
35
36 # Initialise logging
37 log = logging.getLogger("location.importer")
38 log.propagate = 1
39
40 # Define constants
41 VALID_ASN_RANGES = (
42 (1, 23455),
43 (23457, 64495),
44 (131072, 4199999999),
45 )
46
47
48 class CLI(object):
49 def parse_cli(self):
50 parser = argparse.ArgumentParser(
51 description=_("Location Importer Command Line Interface"),
52 )
53 subparsers = parser.add_subparsers()
54
55 # Global configuration flags
56 parser.add_argument("--debug", action="store_true",
57 help=_("Enable debug output"))
58 parser.add_argument("--quiet", action="store_true",
59 help=_("Enable quiet mode"))
60
61 # version
62 parser.add_argument("--version", action="version",
63 version="%(prog)s @VERSION@")
64
65 # Database
66 parser.add_argument("--database-host", required=True,
67 help=_("Database Hostname"), metavar=_("HOST"))
68 parser.add_argument("--database-name", required=True,
69 help=_("Database Name"), metavar=_("NAME"))
70 parser.add_argument("--database-username", required=True,
71 help=_("Database Username"), metavar=_("USERNAME"))
72 parser.add_argument("--database-password", required=True,
73 help=_("Database Password"), metavar=_("PASSWORD"))
74
75 # Write Database
76 write = subparsers.add_parser("write", help=_("Write database to file"))
77 write.set_defaults(func=self.handle_write)
78 write.add_argument("file", nargs=1, help=_("Database File"))
79 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
80 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
81 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
82 write.add_argument("--description", nargs="?", help=_("Sets a description"))
83 write.add_argument("--license", nargs="?", help=_("Sets the license"))
84 write.add_argument("--version", type=int, help=_("Database Format Version"))
85
86 # Update WHOIS
87 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
88 update_whois.set_defaults(func=self.handle_update_whois)
89
90 # Update announcements
91 update_announcements = subparsers.add_parser("update-announcements",
92 help=_("Update BGP Annoucements"))
93 update_announcements.set_defaults(func=self.handle_update_announcements)
94 update_announcements.add_argument("server", nargs=1,
95 help=_("Route Server to connect to"), metavar=_("SERVER"))
96
97 # Update overrides
98 update_overrides = subparsers.add_parser("update-overrides",
99 help=_("Update overrides"),
100 )
101 update_overrides.add_argument(
102 "files", nargs="+", help=_("Files to import"),
103 )
104 update_overrides.set_defaults(func=self.handle_update_overrides)
105
106 # Import countries
107 import_countries = subparsers.add_parser("import-countries",
108 help=_("Import countries"),
109 )
110 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
111 help=_("File to import"))
112 import_countries.set_defaults(func=self.handle_import_countries)
113
114 args = parser.parse_args()
115
116 # Configure logging
117 if args.debug:
118 location.logger.set_level(logging.DEBUG)
119 elif args.quiet:
120 location.logger.set_level(logging.WARNING)
121
122 # Print usage if no action was given
123 if not "func" in args:
124 parser.print_usage()
125 sys.exit(2)
126
127 return args
128
129 def run(self):
130 # Parse command line arguments
131 args = self.parse_cli()
132
133 # Initialise database
134 self.db = self._setup_database(args)
135
136 # Call function
137 ret = args.func(args)
138
139 # Return with exit code
140 if ret:
141 sys.exit(ret)
142
143 # Otherwise just exit
144 sys.exit(0)
145
146 def _setup_database(self, ns):
147 """
148 Initialise the database
149 """
150 # Connect to database
151 db = location.database.Connection(
152 host=ns.database_host, database=ns.database_name,
153 user=ns.database_username, password=ns.database_password,
154 )
155
156 with db.transaction():
157 db.execute("""
158 -- announcements
159 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
160 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
161 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
162 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
163 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
164 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
165
166 -- autnums
167 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
168 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
169 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
170
171 -- countries
172 CREATE TABLE IF NOT EXISTS countries(
173 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
174 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
175
176 -- networks
177 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
178 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
179 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
180 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
181 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
182 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
183
184 -- overrides
185 CREATE TABLE IF NOT EXISTS autnum_overrides(
186 number bigint NOT NULL,
187 name text,
188 country text,
189 is_anonymous_proxy boolean,
190 is_satellite_provider boolean,
191 is_anycast boolean
192 );
193 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
194 ON autnum_overrides(number);
195 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
196 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
197
198 CREATE TABLE IF NOT EXISTS network_overrides(
199 network inet NOT NULL,
200 country text,
201 is_anonymous_proxy boolean,
202 is_satellite_provider boolean,
203 is_anycast boolean
204 );
205 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
206 ON network_overrides(network);
207 CREATE INDEX IF NOT EXISTS network_overrides_search
208 ON network_overrides USING GIST(network inet_ops);
209 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
210 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
211 """)
212
213 return db
214
215 def handle_write(self, ns):
216 """
217 Compiles a database in libloc format out of what is in the database
218 """
219 # Allocate a writer
220 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
221
222 # Set all metadata
223 if ns.vendor:
224 writer.vendor = ns.vendor
225
226 if ns.description:
227 writer.description = ns.description
228
229 if ns.license:
230 writer.license = ns.license
231
232 # Add all Autonomous Systems
233 log.info("Writing Autonomous Systems...")
234
235 # Select all ASes with a name
236 rows = self.db.query("""
237 SELECT
238 autnums.number AS number,
239 COALESCE(
240 (SELECT overrides.name FROM autnum_overrides overrides
241 WHERE overrides.number = autnums.number),
242 autnums.name
243 ) AS name
244 FROM autnums
245 WHERE name <> %s ORDER BY number
246 """, "")
247
248 for row in rows:
249 a = writer.add_as(row.number)
250 a.name = row.name
251
252 # Add all networks
253 log.info("Writing networks...")
254
255 # Select all known networks
256 rows = self.db.query("""
257 WITH known_networks AS (
258 SELECT network FROM announcements
259 UNION
260 SELECT network FROM networks
261 UNION
262 SELECT network FROM network_overrides
263 ),
264
265 ordered_networks AS (
266 SELECT
267 known_networks.network AS network,
268 announcements.autnum AS autnum,
269 networks.country AS country,
270
271 -- Must be part of returned values for ORDER BY clause
272 masklen(announcements.network) AS sort_a,
273 masklen(networks.network) AS sort_b
274 FROM
275 known_networks
276 LEFT JOIN
277 announcements ON known_networks.network <<= announcements.network
278 LEFT JOIN
279 networks ON known_networks.network <<= networks.network
280 ORDER BY
281 known_networks.network,
282 sort_a DESC,
283 sort_b DESC
284 )
285
286 -- Return a list of those networks enriched with all
287 -- other information that we store in the database
288 SELECT
289 DISTINCT ON (network)
290 network,
291 autnum,
292
293 -- Country
294 COALESCE(
295 (
296 SELECT country FROM network_overrides overrides
297 WHERE networks.network <<= overrides.network
298 ORDER BY masklen(overrides.network) DESC
299 LIMIT 1
300 ),
301 (
302 SELECT country FROM autnum_overrides overrides
303 WHERE networks.autnum = overrides.number
304 ),
305 networks.country
306 ) AS country,
307
308 -- Flags
309 COALESCE(
310 (
311 SELECT is_anonymous_proxy FROM network_overrides overrides
312 WHERE networks.network <<= overrides.network
313 ORDER BY masklen(overrides.network) DESC
314 LIMIT 1
315 ),
316 (
317 SELECT is_anonymous_proxy FROM autnum_overrides overrides
318 WHERE networks.autnum = overrides.number
319 ),
320 FALSE
321 ) AS is_anonymous_proxy,
322 COALESCE(
323 (
324 SELECT is_satellite_provider FROM network_overrides overrides
325 WHERE networks.network <<= overrides.network
326 ORDER BY masklen(overrides.network) DESC
327 LIMIT 1
328 ),
329 (
330 SELECT is_satellite_provider FROM autnum_overrides overrides
331 WHERE networks.autnum = overrides.number
332 ),
333 FALSE
334 ) AS is_satellite_provider,
335 COALESCE(
336 (
337 SELECT is_anycast FROM network_overrides overrides
338 WHERE networks.network <<= overrides.network
339 ORDER BY masklen(overrides.network) DESC
340 LIMIT 1
341 ),
342 (
343 SELECT is_anycast FROM autnum_overrides overrides
344 WHERE networks.autnum = overrides.number
345 ),
346 FALSE
347 ) AS is_anycast,
348 COALESCE(
349 (
350 SELECT is_drop FROM network_overrides overrides
351 WHERE networks.network <<= overrides.network
352 ORDER BY masklen(overrides.network) DESC
353 LIMIT 1
354 ),
355 (
356 SELECT is_drop FROM autnum_overrides overrides
357 WHERE networks.autnum = overrides.number
358 ),
359 FALSE
360 ) AS is_drop
361 FROM
362 ordered_networks networks
363 """)
364
365 for row in rows:
366 network = writer.add_network(row.network)
367
368 # Save country
369 if row.country:
370 network.country_code = row.country
371
372 # Save ASN
373 if row.autnum:
374 network.asn = row.autnum
375
376 # Set flags
377 if row.is_anonymous_proxy:
378 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
379
380 if row.is_satellite_provider:
381 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
382
383 if row.is_anycast:
384 network.set_flag(location.NETWORK_FLAG_ANYCAST)
385
386 if row.is_drop:
387 network.set_flag(location.NETWORK_FLAG_DROP)
388
389 # Add all countries
390 log.info("Writing countries...")
391 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
392
393 for row in rows:
394 c = writer.add_country(row.country_code)
395 c.continent_code = row.continent_code
396 c.name = row.name
397
398 # Write everything to file
399 log.info("Writing database to file...")
400 for file in ns.file:
401 writer.write(file)
402
403 def handle_update_whois(self, ns):
404 downloader = location.importer.Downloader()
405
406 # Download all sources
407 with self.db.transaction():
408 # Create some temporary tables to store parsed data
409 self.db.execute("""
410 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL, organization text NOT NULL, source text NOT NULL)
411 ON COMMIT DROP;
412 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
413
414 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL, name text NOT NULL, source text NOT NULL)
415 ON COMMIT DROP;
416 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
417
418 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL)
419 ON COMMIT DROP;
420 CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
421 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
422 """)
423
424 # Remove all previously imported content
425 self.db.execute("""
426 TRUNCATE TABLE networks;
427 """)
428
429 # Fetch all valid country codes to check parsed networks aganist...
430 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
431 validcountries = []
432
433 for row in rows:
434 validcountries.append(row.country_code)
435
436 for source_key in location.importer.WHOIS_SOURCES:
437 for single_url in location.importer.WHOIS_SOURCES[source_key]:
438 with downloader.request(single_url, return_blocks=True) as f:
439 for block in f:
440 self._parse_block(block, source_key, validcountries)
441
442 # Process all parsed networks from every RIR we happen to have access to,
443 # insert the largest network chunks into the networks table immediately...
444 families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
445
446 for family in (row.family for row in families):
447 smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
448
449 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
450 SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
451
452 # ... determine any other prefixes for this network family, ...
453 prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
454 WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family)
455
456 # ... and insert networks with this prefix in case they provide additional
457 # information (i. e. subnet of a larger chunk with a different country)
458 for prefix in (row.prefix for row in prefixes):
459 self.db.execute("""
460 WITH candidates AS (
461 SELECT
462 _rirdata.network,
463 _rirdata.country,
464 _rirdata.original_countries,
465 _rirdata.source
466 FROM
467 _rirdata
468 WHERE
469 family(_rirdata.network) = %s
470 AND
471 masklen(_rirdata.network) = %s
472 ),
473 filtered AS (
474 SELECT
475 DISTINCT ON (c.network)
476 c.network,
477 c.country,
478 c.original_countries,
479 c.source,
480 masklen(networks.network),
481 networks.country AS parent_country
482 FROM
483 candidates c
484 LEFT JOIN
485 networks
486 ON
487 c.network << networks.network
488 ORDER BY
489 c.network,
490 masklen(networks.network) DESC NULLS LAST
491 )
492 INSERT INTO
493 networks(network, country, original_countries, source)
494 SELECT
495 network,
496 country,
497 original_countries,
498 source
499 FROM
500 filtered
501 WHERE
502 parent_country IS NULL
503 OR
504 country <> parent_country
505 ON CONFLICT DO NOTHING""",
506 family, prefix,
507 )
508
509 self.db.execute("""
510 INSERT INTO autnums(number, name, source)
511 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
512 JOIN _organizations ON _autnums.organization = _organizations.handle
513 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
514 """)
515
516 # Download all extended sources
517 for source_key in location.importer.EXTENDED_SOURCES:
518 for single_url in location.importer.EXTENDED_SOURCES[source_key]:
519 with self.db.transaction():
520 # Download data
521 with downloader.request(single_url) as f:
522 for line in f:
523 self._parse_line(line, source_key, validcountries)
524
525 # Download and import (technical) AS names from ARIN
526 self._import_as_names_from_arin()
527
528 def _check_parsed_network(self, network):
529 """
530 Assistive function to detect and subsequently sort out parsed
531 networks from RIR data (both Whois and so-called "extended sources"),
532 which are or have...
533
534 (a) not globally routable (RFC 1918 space, et al.)
535 (b) covering a too large chunk of the IP address space (prefix length
536 is < 7 for IPv4 networks, and < 10 for IPv6)
537 (c) "0.0.0.0" or "::" as a network address
538 (d) are too small for being publicly announced (we have decided not to
539 process them at the moment, as they significantly enlarge our
540 database without providing very helpful additional information)
541
542 This unfortunately is necessary due to brain-dead clutter across
543 various RIR databases, causing mismatches and eventually disruptions.
544
545 We will return False in case a network is not suitable for adding
546 it to our database, and True otherwise.
547 """
548
549 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
550 return False
551
552 if not network.is_global:
553 log.debug("Skipping non-globally routable network: %s" % network)
554 return False
555
556 if network.version == 4:
557 if network.prefixlen < 7:
558 log.debug("Skipping too big IP chunk: %s" % network)
559 return False
560
561 if network.prefixlen > 24:
562 log.debug("Skipping network too small to be publicly announced: %s" % network)
563 return False
564
565 if str(network.network_address) == "0.0.0.0":
566 log.debug("Skipping network based on 0.0.0.0: %s" % network)
567 return False
568
569 elif network.version == 6:
570 if network.prefixlen < 10:
571 log.debug("Skipping too big IP chunk: %s" % network)
572 return False
573
574 if network.prefixlen > 48:
575 log.debug("Skipping network too small to be publicly announced: %s" % network)
576 return False
577
578 if str(network.network_address) == "::":
579 log.debug("Skipping network based on '::': %s" % network)
580 return False
581
582 else:
583 # This should not happen...
584 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
585 return False
586
587 # In case we have made it here, the network is considered to
588 # be suitable for libloc consumption...
589 return True
590
591 def _check_parsed_asn(self, asn):
592 """
593 Assistive function to filter Autonomous System Numbers not being suitable
594 for adding to our database. Returns False in such cases, and True otherwise.
595 """
596
597 for start, end in VALID_ASN_RANGES:
598 if start <= asn and end >= asn:
599 return True
600
601 log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
602 return False
603
604 def _parse_block(self, block, source_key, validcountries = None):
605 # Get first line to find out what type of block this is
606 line = block[0]
607
608 # aut-num
609 if line.startswith("aut-num:"):
610 return self._parse_autnum_block(block, source_key)
611
612 # inetnum
613 if line.startswith("inet6num:") or line.startswith("inetnum:"):
614 return self._parse_inetnum_block(block, source_key, validcountries)
615
616 # organisation
617 elif line.startswith("organisation:"):
618 return self._parse_org_block(block, source_key)
619
620 def _parse_autnum_block(self, block, source_key):
621 autnum = {}
622 for line in block:
623 # Split line
624 key, val = split_line(line)
625
626 if key == "aut-num":
627 m = re.match(r"^(AS|as)(\d+)", val)
628 if m:
629 autnum["asn"] = m.group(2)
630
631 elif key == "org":
632 autnum[key] = val.upper()
633
634 elif key == "descr":
635 # Save the first description line as well...
636 if not key in autnum:
637 autnum[key] = val
638
639 # Skip empty objects
640 if not autnum or not "asn" in autnum:
641 return
642
643 # Insert a dummy organisation handle into our temporary organisations
644 # table in case the AS does not have an organisation handle set, but
645 # has a description (a quirk often observed in APNIC area), so we can
646 # later display at least some string for this AS.
647 if not "org" in autnum:
648 if "descr" in autnum:
649 autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn")
650
651 self.db.execute("INSERT INTO _organizations(handle, name, source) \
652 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
653 autnum.get("org"), autnum.get("descr"), source_key,
654 )
655 else:
656 log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
657 autnum.get("asn"))
658 return
659
660 # Insert into database
661 self.db.execute("INSERT INTO _autnums(number, organization, source) \
662 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
663 organization = excluded.organization",
664 autnum.get("asn"), autnum.get("org"), source_key,
665 )
666
667 def _parse_inetnum_block(self, block, source_key, validcountries = None):
668 log.debug("Parsing inetnum block:")
669
670 inetnum = {}
671 for line in block:
672 log.debug(line)
673
674 # Split line
675 key, val = split_line(line)
676
677 # Filter any inetnum records which are only referring to IP space
678 # not managed by that specific RIR...
679 if key == "netname":
680 if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()):
681 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
682 return
683
684 if key == "inetnum":
685 start_address, delim, end_address = val.partition("-")
686
687 # Strip any excess space
688 start_address, end_address = start_address.rstrip(), end_address.strip()
689
690 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
691 if start_address and not (delim or end_address):
692 try:
693 start_address = ipaddress.ip_network(start_address, strict=False)
694 except ValueError:
695 start_address = start_address.split("/")
696 ldigits = start_address[0].count(".")
697
698 # How many octets do we need to add?
699 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
700 if ldigits == 1:
701 start_address = start_address[0] + ".0.0/" + start_address[1]
702 elif ldigits == 2:
703 start_address = start_address[0] + ".0/" + start_address[1]
704 else:
705 log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line)
706 return
707
708 try:
709 start_address = ipaddress.ip_network(start_address, strict=False)
710 except ValueError:
711 log.warning("Could not parse line in LACNIC DB format: %s" % line)
712 return
713
714 # Enumerate first and last IP address of this network
715 end_address = start_address[-1]
716 start_address = start_address[0]
717
718 else:
719 # Convert to IP address
720 try:
721 start_address = ipaddress.ip_address(start_address)
722 end_address = ipaddress.ip_address(end_address)
723 except ValueError:
724 log.warning("Could not parse line: %s" % line)
725 return
726
727 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
728
729 elif key == "inet6num":
730 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
731
732 elif key == "country":
733 val = val.upper()
734
735 # Catch RIR data objects with more than one country code...
736 if not key in inetnum:
737 inetnum[key] = []
738 else:
739 if val in inetnum.get("country"):
740 # ... but keep this list distinct...
741 continue
742
743 # When people set country codes to "UK", they actually mean "GB"
744 if val == "UK":
745 val = "GB"
746
747 inetnum[key].append(val)
748
749 # Skip empty objects
750 if not inetnum or not "country" in inetnum:
751 return
752
753 # Prepare skipping objects with unknown country codes...
754 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
755
756 # Iterate through all networks enumerated from above, check them for plausibility and insert
757 # them into the database, if _check_parsed_network() succeeded
758 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
759 if self._check_parsed_network(single_network):
760
761 # Skip objects with unknown country codes if they are valid to avoid log spam...
762 if validcountries and invalidcountries:
763 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
764 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
765 break
766
767 # Everything is fine here, run INSERT statement...
768 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
769 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
770 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
771 )
772
773 def _parse_org_block(self, block, source_key):
774 org = {}
775 for line in block:
776 # Split line
777 key, val = split_line(line)
778
779 if key == "organisation":
780 org[key] = val.upper()
781 elif key == "org-name":
782 org[key] = val
783
784 # Skip empty objects
785 if not org:
786 return
787
788 self.db.execute("INSERT INTO _organizations(handle, name, source) \
789 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
790 UPDATE SET name = excluded.name",
791 org.get("organisation"), org.get("org-name"), source_key,
792 )
793
794 def _parse_line(self, line, source_key, validcountries = None):
795 # Skip version line
796 if line.startswith("2"):
797 return
798
799 # Skip comments
800 if line.startswith("#"):
801 return
802
803 try:
804 registry, country_code, type, line = line.split("|", 3)
805 except:
806 log.warning("Could not parse line: %s" % line)
807 return
808
809 # Skip any lines that are for stats only or do not have a country
810 # code at all (avoids log spam below)
811 if not country_code or country_code == '*':
812 return
813
814 # Skip objects with unknown country codes
815 if validcountries and country_code not in validcountries:
816 log.warning("Skipping line with bogus country '%s': %s" % \
817 (country_code, line))
818 return
819
820 if type in ("ipv6", "ipv4"):
821 return self._parse_ip_line(country_code, type, line, source_key)
822
823 def _parse_ip_line(self, country, type, line, source_key):
824 try:
825 address, prefix, date, status, organization = line.split("|")
826 except ValueError:
827 organization = None
828
829 # Try parsing the line without organization
830 try:
831 address, prefix, date, status = line.split("|")
832 except ValueError:
833 log.warning("Unhandled line format: %s" % line)
834 return
835
836 # Skip anything that isn't properly assigned
837 if not status in ("assigned", "allocated"):
838 return
839
840 # Cast prefix into an integer
841 try:
842 prefix = int(prefix)
843 except:
844 log.warning("Invalid prefix: %s" % prefix)
845 return
846
847 # Fix prefix length for IPv4
848 if type == "ipv4":
849 prefix = 32 - int(math.log(prefix, 2))
850
851 # Try to parse the address
852 try:
853 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
854 except ValueError:
855 log.warning("Invalid IP address: %s" % address)
856 return
857
858 if not self._check_parsed_network(network):
859 return
860
861 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
862 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
863 UPDATE SET country = excluded.country",
864 "%s" % network, country, [country], source_key,
865 )
866
867 def _import_as_names_from_arin(self):
868 downloader = location.importer.Downloader()
869
870 # XXX: Download AS names file from ARIN (note that these names appear to be quite
871 # technical, not intended for human consumption, as description fields in
872 # organisation handles for other RIRs are - however, this is what we have got,
873 # and in some cases, it might be still better than nothing)
874 with downloader.request("https://ftp.arin.net/info/asn.txt", return_blocks=False) as f:
875 for line in f:
876 # Convert binary line to string...
877 line = str(line)
878
879 # ... valid lines start with a space, followed by the number of the Autonomous System ...
880 if not line.startswith(" "):
881 continue
882
883 # Split line and check if there is a valid ASN in it...
884 asn, name = line.split()[0:2]
885
886 try:
887 asn = int(asn)
888 except ValueError:
889 log.debug("Skipping ARIN AS names line not containing an integer for ASN")
890 continue
891
892 # Filter invalid ASNs...
893 if not self._check_parsed_asn(asn):
894 continue
895
896 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
897 if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|LACNIC|RIPE|IANA)(?:\d?$|\-)", name):
898 continue
899
900 # Bail out in case the AS name contains anything we do not expect here...
901 if re.search(r"[^a-zA-Z0-9-_]", name):
902 log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
903 (asn, name))
904
905 # Things look good here, run INSERT statement and skip this one if we already have
906 # a (better?) name for this Autonomous System...
907 self.db.execute("""
908 INSERT INTO autnums(
909 number,
910 name,
911 source
912 ) VALUES (%s, %s, %s)
913 ON CONFLICT (number) DO NOTHING""",
914 asn,
915 name,
916 "ARIN",
917 )
918
919 def handle_update_announcements(self, ns):
920 server = ns.server[0]
921
922 with self.db.transaction():
923 if server.startswith("/"):
924 self._handle_update_announcements_from_bird(server)
925 else:
926 self._handle_update_announcements_from_telnet(server)
927
928 # Purge anything we never want here
929 self.db.execute("""
930 -- Delete default routes
931 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
932
933 -- Delete anything that is not global unicast address space
934 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
935
936 -- DELETE "current network" address space
937 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
938
939 -- DELETE local loopback address space
940 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
941
942 -- DELETE RFC 1918 address space
943 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
944 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
945 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
946
947 -- DELETE test, benchmark and documentation address space
948 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
949 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
950 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
951 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
952 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
953
954 -- DELETE CGNAT address space (RFC 6598)
955 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
956
957 -- DELETE link local address space
958 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
959
960 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
961 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
962 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
963
964 -- DELETE multicast and reserved address space
965 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
966 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
967
968 -- Delete networks that are too small to be in the global routing table
969 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
970 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
971
972 -- Delete any non-public or reserved ASNs
973 DELETE FROM announcements WHERE NOT (
974 (autnum >= 1 AND autnum <= 23455)
975 OR
976 (autnum >= 23457 AND autnum <= 64495)
977 OR
978 (autnum >= 131072 AND autnum <= 4199999999)
979 );
980
981 -- Delete everything that we have not seen for 14 days
982 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
983 """)
984
985 def _handle_update_announcements_from_bird(self, server):
986 # Pre-compile the regular expression for faster searching
987 route = re.compile(b"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
988
989 log.info("Requesting routing table from Bird (%s)" % server)
990
991 aggregated_networks = []
992
993 # Send command to list all routes
994 for line in self._bird_cmd(server, "show route"):
995 m = route.match(line)
996 if not m:
997 # Skip empty lines
998 if not line:
999 pass
1000
1001 # Ignore any header lines with the name of the routing table
1002 elif line.startswith(b"Table"):
1003 pass
1004
1005 # Log anything else
1006 else:
1007 log.debug("Could not parse line: %s" % line.decode())
1008
1009 continue
1010
1011 # Fetch the extracted network and ASN
1012 network, autnum = m.groups()
1013
1014 # Decode into strings
1015 if network:
1016 network = network.decode()
1017 if autnum:
1018 autnum = autnum.decode()
1019
1020 # Collect all aggregated networks
1021 if not autnum:
1022 log.debug("%s is an aggregated network" % network)
1023 aggregated_networks.append(network)
1024 continue
1025
1026 # Insert it into the database
1027 self.db.execute("INSERT INTO announcements(network, autnum) \
1028 VALUES(%s, %s) ON CONFLICT (network) DO \
1029 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1030 network, autnum,
1031 )
1032
1033 # Process any aggregated networks
1034 for network in aggregated_networks:
1035 log.debug("Processing aggregated network %s" % network)
1036
1037 # Run "show route all" for each network
1038 for line in self._bird_cmd(server, "show route %s all" % network):
1039 # Try finding the path
1040 m = re.match(b"\s+BGP\.as_path:.* (\d+) {\d+}$", line)
1041 if m:
1042 # Select the last AS number in the path
1043 autnum = m.group(1).decode()
1044
1045 # Insert it into the database
1046 self.db.execute("INSERT INTO announcements(network, autnum) \
1047 VALUES(%s, %s) ON CONFLICT (network) DO \
1048 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1049 network, autnum,
1050 )
1051
1052 # We don't need to process any more
1053 break
1054
1055 def _handle_update_announcements_from_telnet(self, server):
1056 # Pre-compile regular expression for routes
1057 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
1058
1059 with telnetlib.Telnet(server) as t:
1060 # Enable debug mode
1061 #if ns.debug:
1062 # t.set_debuglevel(10)
1063
1064 # Wait for console greeting
1065 greeting = t.read_until(b"> ", timeout=30)
1066 if not greeting:
1067 log.error("Could not get a console prompt")
1068 return 1
1069
1070 # Disable pagination
1071 t.write(b"terminal length 0\n")
1072
1073 # Wait for the prompt to return
1074 t.read_until(b"> ")
1075
1076 # Fetch the routing tables
1077 for protocol in ("ipv6", "ipv4"):
1078 log.info("Requesting %s routing table" % protocol)
1079
1080 # Request the full unicast routing table
1081 t.write(b"show bgp %s unicast\n" % protocol.encode())
1082
1083 # Read entire header which ends with "Path"
1084 t.read_until(b"Path\r\n")
1085
1086 while True:
1087 # Try reading a full entry
1088 # Those might be broken across multiple lines but ends with i
1089 line = t.read_until(b"i\r\n", timeout=5)
1090 if not line:
1091 break
1092
1093 # Show line for debugging
1094 #log.debug(repr(line))
1095
1096 # Try finding a route in here
1097 m = route.match(line)
1098 if m:
1099 network, autnum = m.groups()
1100
1101 # Convert network to string
1102 network = network.decode()
1103
1104 # Append /24 for IPv4 addresses
1105 if not "/" in network and not ":" in network:
1106 network = "%s/24" % network
1107
1108 # Convert AS number to integer
1109 autnum = int(autnum)
1110
1111 log.info("Found announcement for %s by %s" % (network, autnum))
1112
1113 self.db.execute("INSERT INTO announcements(network, autnum) \
1114 VALUES(%s, %s) ON CONFLICT (network) DO \
1115 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1116 network, autnum,
1117 )
1118
1119 log.info("Finished reading the %s routing table" % protocol)
1120
1121 def _bird_cmd(self, socket_path, command):
1122 # Connect to the socket
1123 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1124 s.connect(socket_path)
1125
1126 # Allocate some buffer
1127 buffer = b""
1128
1129 log.debug("Sending Bird command: %s" % command)
1130
1131 # Send the command
1132 s.send(b"%s\n" % command.encode())
1133
1134 while True:
1135 # Fill up the buffer
1136 buffer += s.recv(4096)
1137
1138 while True:
1139 # Search for the next newline
1140 pos = buffer.find(b"\n")
1141
1142 # If we cannot find one, we go back and read more data
1143 if pos <= 0:
1144 break
1145
1146 # Cut after the newline character
1147 pos += 1
1148
1149 # Split the line we want and keep the rest in buffer
1150 line, buffer = buffer[:pos], buffer[pos:]
1151
1152 # Try parsing any status lines
1153 if len(line) > 4 and line[:4].isdigit() and line[4] in (32, 45):
1154 code, delim, line = int(line[:4]), line[4], line[5:]
1155
1156 log.debug("Received response code %s from bird" % code)
1157
1158 # End of output
1159 if code == 0:
1160 return
1161
1162 # Ignore hello line
1163 elif code == 1:
1164 continue
1165
1166 # Otherwise return the line
1167 yield line
1168
1169 def handle_update_overrides(self, ns):
1170 with self.db.transaction():
1171 # Only drop manually created overrides, as we can be reasonably sure to have them,
1172 # and preserve the rest. If appropriate, it is deleted by correspondent functions.
1173 self.db.execute("""
1174 DELETE FROM autnum_overrides WHERE source = 'manual';
1175 DELETE FROM network_overrides WHERE source = 'manual';
1176 """)
1177
1178 # Update overrides for various cloud providers big enough to publish their own IP
1179 # network allocation lists in a machine-readable format...
1180 self._update_overrides_for_aws()
1181
1182 # Update overrides for Spamhaus DROP feeds...
1183 self._update_overrides_for_spamhaus_drop()
1184
1185 for file in ns.files:
1186 log.info("Reading %s..." % file)
1187
1188 with open(file, "rb") as f:
1189 for type, block in location.importer.read_blocks(f):
1190 if type == "net":
1191 network = block.get("net")
1192 # Try to parse and normalise the network
1193 try:
1194 network = ipaddress.ip_network(network, strict=False)
1195 except ValueError as e:
1196 log.warning("Invalid IP network: %s: %s" % (network, e))
1197 continue
1198
1199 # Prevent that we overwrite all networks
1200 if network.prefixlen == 0:
1201 log.warning("Skipping %s: You cannot overwrite default" % network)
1202 continue
1203
1204 self.db.execute("""
1205 INSERT INTO network_overrides(
1206 network,
1207 country,
1208 source,
1209 is_anonymous_proxy,
1210 is_satellite_provider,
1211 is_anycast,
1212 is_drop
1213 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
1214 ON CONFLICT (network) DO NOTHING""",
1215 "%s" % network,
1216 block.get("country"),
1217 "manual",
1218 self._parse_bool(block, "is-anonymous-proxy"),
1219 self._parse_bool(block, "is-satellite-provider"),
1220 self._parse_bool(block, "is-anycast"),
1221 self._parse_bool(block, "drop"),
1222 )
1223
1224 elif type == "aut-num":
1225 autnum = block.get("aut-num")
1226
1227 # Check if AS number begins with "AS"
1228 if not autnum.startswith("AS"):
1229 log.warning("Invalid AS number: %s" % autnum)
1230 continue
1231
1232 # Strip "AS"
1233 autnum = autnum[2:]
1234
1235 self.db.execute("""
1236 INSERT INTO autnum_overrides(
1237 number,
1238 name,
1239 country,
1240 source,
1241 is_anonymous_proxy,
1242 is_satellite_provider,
1243 is_anycast,
1244 is_drop
1245 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
1246 ON CONFLICT DO NOTHING""",
1247 autnum,
1248 block.get("name"),
1249 block.get("country"),
1250 "manual",
1251 self._parse_bool(block, "is-anonymous-proxy"),
1252 self._parse_bool(block, "is-satellite-provider"),
1253 self._parse_bool(block, "is-anycast"),
1254 self._parse_bool(block, "drop"),
1255 )
1256
1257 else:
1258 log.warning("Unsupported type: %s" % type)
1259
1260 def _update_overrides_for_aws(self):
1261 # Download Amazon AWS IP allocation file to create overrides...
1262 downloader = location.importer.Downloader()
1263
1264 try:
1265 with downloader.request("https://ip-ranges.amazonaws.com/ip-ranges.json", return_blocks=False) as f:
1266 aws_ip_dump = json.load(f.body)
1267 except Exception as e:
1268 log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
1269 return
1270
1271 # At this point, we can assume the downloaded file to be valid
1272 self.db.execute("""
1273 DELETE FROM network_overrides WHERE source = 'Amazon AWS IP feed';
1274 """)
1275
1276 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1277 # there seems to be no machine-readable version available of this other than
1278 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1279 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1280 # was helpful here as well.
1281 aws_region_country_map = {
1282 "af-south-1": "ZA",
1283 "ap-east-1": "HK",
1284 "ap-south-1": "IN",
1285 "ap-south-2": "IN",
1286 "ap-northeast-3": "JP",
1287 "ap-northeast-2": "KR",
1288 "ap-southeast-1": "SG",
1289 "ap-southeast-2": "AU",
1290 "ap-southeast-3": "MY",
1291 "ap-southeast-4": "AU",
1292 "ap-northeast-1": "JP",
1293 "ca-central-1": "CA",
1294 "eu-central-1": "DE",
1295 "eu-central-2": "CH",
1296 "eu-west-1": "IE",
1297 "eu-west-2": "GB",
1298 "eu-south-1": "IT",
1299 "eu-south-2": "ES",
1300 "eu-west-3": "FR",
1301 "eu-north-1": "SE",
1302 "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
1303 "me-central-1": "AE",
1304 "me-south-1": "BH",
1305 "sa-east-1": "BR"
1306 }
1307
1308 # Fetch all valid country codes to check parsed networks aganist...
1309 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
1310 validcountries = []
1311
1312 for row in rows:
1313 validcountries.append(row.country_code)
1314
1315 with self.db.transaction():
1316 for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
1317 try:
1318 network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
1319 except ValueError:
1320 log.warning("Unable to parse line: %s" % snetwork)
1321 continue
1322
1323 # Sanitize parsed networks...
1324 if not self._check_parsed_network(network):
1325 continue
1326
1327 # Determine region of this network...
1328 region = snetwork["region"]
1329 cc = None
1330 is_anycast = False
1331
1332 # Any region name starting with "us-" will get "US" country code assigned straight away...
1333 if region.startswith("us-"):
1334 cc = "US"
1335 elif region.startswith("cn-"):
1336 # ... same goes for China ...
1337 cc = "CN"
1338 elif region == "GLOBAL":
1339 # ... funny region name for anycast-like networks ...
1340 is_anycast = True
1341 elif region in aws_region_country_map:
1342 # ... assign looked up country code otherwise ...
1343 cc = aws_region_country_map[region]
1344 else:
1345 # ... and bail out if we are missing something here
1346 log.warning("Unable to determine country code for line: %s" % snetwork)
1347 continue
1348
1349 # Skip networks with unknown country codes
1350 if not is_anycast and validcountries and cc not in validcountries:
1351 log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1352 (cc, network))
1353 return
1354
1355 # Conduct SQL statement...
1356 self.db.execute("""
1357 INSERT INTO network_overrides(
1358 network,
1359 country,
1360 source,
1361 is_anonymous_proxy,
1362 is_satellite_provider,
1363 is_anycast
1364 ) VALUES (%s, %s, %s, %s, %s, %s)
1365 ON CONFLICT (network) DO NOTHING""",
1366 "%s" % network,
1367 cc,
1368 "Amazon AWS IP feed",
1369 None,
1370 None,
1371 is_anycast,
1372 )
1373
1374
1375 def _update_overrides_for_spamhaus_drop(self):
1376 downloader = location.importer.Downloader()
1377
1378 ip_urls = [
1379 "https://www.spamhaus.org/drop/drop.txt",
1380 "https://www.spamhaus.org/drop/edrop.txt",
1381 "https://www.spamhaus.org/drop/dropv6.txt"
1382 ]
1383
1384 asn_urls = [
1385 "https://www.spamhaus.org/drop/asndrop.txt"
1386 ]
1387
1388 for url in ip_urls:
1389 try:
1390 with downloader.request(url, return_blocks=False) as f:
1391 fcontent = f.body.readlines()
1392 except Exception as e:
1393 log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e))
1394 return
1395
1396 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
1397 # downloads.
1398 if len(fcontent) > 10:
1399 self.db.execute("""
1400 DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list';
1401 DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists';
1402 """)
1403 else:
1404 log.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url)
1405 continue
1406
1407 # Iterate through every line, filter comments and add remaining networks to
1408 # the override table in case they are valid...
1409 with self.db.transaction():
1410 for sline in fcontent:
1411
1412 # The response is assumed to be encoded in UTF-8...
1413 sline = sline.decode("utf-8")
1414
1415 # Comments start with a semicolon...
1416 if sline.startswith(";"):
1417 continue
1418
1419 # Extract network and ignore anything afterwards...
1420 try:
1421 network = ipaddress.ip_network(sline.split()[0], strict=False)
1422 except ValueError:
1423 log.error("Unable to parse line: %s" % sline)
1424 continue
1425
1426 # Sanitize parsed networks...
1427 if not self._check_parsed_network(network):
1428 log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
1429 (url, network))
1430 continue
1431
1432 # Conduct SQL statement...
1433 self.db.execute("""
1434 INSERT INTO network_overrides(
1435 network,
1436 source,
1437 is_drop
1438 ) VALUES (%s, %s, %s)
1439 ON CONFLICT (network) DO UPDATE SET is_drop = True""",
1440 "%s" % network,
1441 "Spamhaus DROP lists",
1442 True
1443 )
1444
1445 for url in asn_urls:
1446 try:
1447 with downloader.request(url, return_blocks=False) as f:
1448 fcontent = f.body.readlines()
1449 except Exception as e:
1450 log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e))
1451 return
1452
1453 # Iterate through every line, filter comments and add remaining ASNs to
1454 # the override table in case they are valid...
1455 with self.db.transaction():
1456 for sline in fcontent:
1457
1458 # The response is assumed to be encoded in UTF-8...
1459 sline = sline.decode("utf-8")
1460
1461 # Comments start with a semicolon...
1462 if sline.startswith(";"):
1463 continue
1464
1465 # Throw away anything after the first space...
1466 sline = sline.split()[0]
1467
1468 # ... strip the "AS" prefix from it ...
1469 sline = sline.strip("AS")
1470
1471 # ... and convert it into an integer. Voila.
1472 asn = int(sline)
1473
1474 # Filter invalid ASNs...
1475 if not self._check_parsed_asn(asn):
1476 log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
1477 (url, asn))
1478 continue
1479
1480 # Conduct SQL statement...
1481 self.db.execute("""
1482 INSERT INTO autnum_overrides(
1483 number,
1484 source,
1485 is_drop
1486 ) VALUES (%s, %s, %s)
1487 ON CONFLICT (number) DO UPDATE SET is_drop = True""",
1488 "%s" % asn,
1489 "Spamhaus ASN-DROP list",
1490 True
1491 )
1492
1493 @staticmethod
1494 def _parse_bool(block, key):
1495 val = block.get(key)
1496
1497 # There is no point to proceed when we got None
1498 if val is None:
1499 return
1500
1501 # Convert to lowercase
1502 val = val.lower()
1503
1504 # True
1505 if val in ("yes", "1"):
1506 return True
1507
1508 # False
1509 if val in ("no", "0"):
1510 return False
1511
1512 # Default to None
1513 return None
1514
1515 def handle_import_countries(self, ns):
1516 with self.db.transaction():
1517 # Drop all data that we have
1518 self.db.execute("TRUNCATE TABLE countries")
1519
1520 for file in ns.file:
1521 for line in file:
1522 line = line.rstrip()
1523
1524 # Ignore any comments
1525 if line.startswith("#"):
1526 continue
1527
1528 try:
1529 country_code, continent_code, name = line.split(maxsplit=2)
1530 except:
1531 log.warning("Could not parse line: %s" % line)
1532 continue
1533
1534 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
1535 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
1536
1537
1538 def split_line(line):
1539 key, colon, val = line.partition(":")
1540
1541 # Strip any excess space
1542 key = key.strip()
1543 val = val.strip()
1544
1545 return key, val
1546
1547 def main():
1548 # Run the command line interface
1549 c = CLI()
1550 c.run()
1551
1552 main()