]> git.ipfire.org Git - people/ms/libloc.git/blob - src/scripts/location-importer.in
importer: Move importing extended sources/ARIN into transaction
[people/ms/libloc.git] / src / scripts / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020-2022 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import json
23 import logging
24 import math
25 import re
26 import socket
27 import sys
28 import telnetlib
29
30 # Load our location module
31 import location
32 import location.database
33 import location.importer
34 from location.i18n import _
35
36 # Initialise logging
37 log = logging.getLogger("location.importer")
38 log.propagate = 1
39
40 # Define constants
41 VALID_ASN_RANGES = (
42 (1, 23455),
43 (23457, 64495),
44 (131072, 4199999999),
45 )
46
47
48 class CLI(object):
49 def parse_cli(self):
50 parser = argparse.ArgumentParser(
51 description=_("Location Importer Command Line Interface"),
52 )
53 subparsers = parser.add_subparsers()
54
55 # Global configuration flags
56 parser.add_argument("--debug", action="store_true",
57 help=_("Enable debug output"))
58 parser.add_argument("--quiet", action="store_true",
59 help=_("Enable quiet mode"))
60
61 # version
62 parser.add_argument("--version", action="version",
63 version="%(prog)s @VERSION@")
64
65 # Database
66 parser.add_argument("--database-host", required=True,
67 help=_("Database Hostname"), metavar=_("HOST"))
68 parser.add_argument("--database-name", required=True,
69 help=_("Database Name"), metavar=_("NAME"))
70 parser.add_argument("--database-username", required=True,
71 help=_("Database Username"), metavar=_("USERNAME"))
72 parser.add_argument("--database-password", required=True,
73 help=_("Database Password"), metavar=_("PASSWORD"))
74
75 # Write Database
76 write = subparsers.add_parser("write", help=_("Write database to file"))
77 write.set_defaults(func=self.handle_write)
78 write.add_argument("file", nargs=1, help=_("Database File"))
79 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
80 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
81 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
82 write.add_argument("--description", nargs="?", help=_("Sets a description"))
83 write.add_argument("--license", nargs="?", help=_("Sets the license"))
84 write.add_argument("--version", type=int, help=_("Database Format Version"))
85
86 # Update WHOIS
87 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
88 update_whois.set_defaults(func=self.handle_update_whois)
89
90 # Update announcements
91 update_announcements = subparsers.add_parser("update-announcements",
92 help=_("Update BGP Annoucements"))
93 update_announcements.set_defaults(func=self.handle_update_announcements)
94 update_announcements.add_argument("server", nargs=1,
95 help=_("Route Server to connect to"), metavar=_("SERVER"))
96
97 # Update overrides
98 update_overrides = subparsers.add_parser("update-overrides",
99 help=_("Update overrides"),
100 )
101 update_overrides.add_argument(
102 "files", nargs="+", help=_("Files to import"),
103 )
104 update_overrides.set_defaults(func=self.handle_update_overrides)
105
106 # Import countries
107 import_countries = subparsers.add_parser("import-countries",
108 help=_("Import countries"),
109 )
110 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
111 help=_("File to import"))
112 import_countries.set_defaults(func=self.handle_import_countries)
113
114 args = parser.parse_args()
115
116 # Configure logging
117 if args.debug:
118 location.logger.set_level(logging.DEBUG)
119 elif args.quiet:
120 location.logger.set_level(logging.WARNING)
121
122 # Print usage if no action was given
123 if not "func" in args:
124 parser.print_usage()
125 sys.exit(2)
126
127 return args
128
129 def run(self):
130 # Parse command line arguments
131 args = self.parse_cli()
132
133 # Initialise database
134 self.db = self._setup_database(args)
135
136 # Call function
137 ret = args.func(args)
138
139 # Return with exit code
140 if ret:
141 sys.exit(ret)
142
143 # Otherwise just exit
144 sys.exit(0)
145
146 def _setup_database(self, ns):
147 """
148 Initialise the database
149 """
150 # Connect to database
151 db = location.database.Connection(
152 host=ns.database_host, database=ns.database_name,
153 user=ns.database_username, password=ns.database_password,
154 )
155
156 with db.transaction():
157 db.execute("""
158 -- announcements
159 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
160 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
161 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
162 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
163 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
164 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
165
166 -- autnums
167 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
168 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
169 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
170
171 -- countries
172 CREATE TABLE IF NOT EXISTS countries(
173 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
174 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
175
176 -- networks
177 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
178 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
179 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
180 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
181 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
182 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
183
184 -- overrides
185 CREATE TABLE IF NOT EXISTS autnum_overrides(
186 number bigint NOT NULL,
187 name text,
188 country text,
189 is_anonymous_proxy boolean,
190 is_satellite_provider boolean,
191 is_anycast boolean
192 );
193 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
194 ON autnum_overrides(number);
195 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
196 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
197
198 CREATE TABLE IF NOT EXISTS network_overrides(
199 network inet NOT NULL,
200 country text,
201 is_anonymous_proxy boolean,
202 is_satellite_provider boolean,
203 is_anycast boolean
204 );
205 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
206 ON network_overrides(network);
207 CREATE INDEX IF NOT EXISTS network_overrides_search
208 ON network_overrides USING GIST(network inet_ops);
209 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
210 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
211 """)
212
213 return db
214
215 def handle_write(self, ns):
216 """
217 Compiles a database in libloc format out of what is in the database
218 """
219 # Allocate a writer
220 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
221
222 # Set all metadata
223 if ns.vendor:
224 writer.vendor = ns.vendor
225
226 if ns.description:
227 writer.description = ns.description
228
229 if ns.license:
230 writer.license = ns.license
231
232 # Add all Autonomous Systems
233 log.info("Writing Autonomous Systems...")
234
235 # Select all ASes with a name
236 rows = self.db.query("""
237 SELECT
238 autnums.number AS number,
239 COALESCE(
240 (SELECT overrides.name FROM autnum_overrides overrides
241 WHERE overrides.number = autnums.number),
242 autnums.name
243 ) AS name
244 FROM autnums
245 WHERE name <> %s ORDER BY number
246 """, "")
247
248 for row in rows:
249 a = writer.add_as(row.number)
250 a.name = row.name
251
252 # Add all networks
253 log.info("Writing networks...")
254
255 # Select all known networks
256 rows = self.db.query("""
257 WITH known_networks AS (
258 SELECT network FROM announcements
259 UNION
260 SELECT network FROM networks
261 UNION
262 SELECT network FROM network_overrides
263 ),
264
265 ordered_networks AS (
266 SELECT
267 known_networks.network AS network,
268 announcements.autnum AS autnum,
269 networks.country AS country,
270
271 -- Must be part of returned values for ORDER BY clause
272 masklen(announcements.network) AS sort_a,
273 masklen(networks.network) AS sort_b
274 FROM
275 known_networks
276 LEFT JOIN
277 announcements ON known_networks.network <<= announcements.network
278 LEFT JOIN
279 networks ON known_networks.network <<= networks.network
280 ORDER BY
281 known_networks.network,
282 sort_a DESC,
283 sort_b DESC
284 )
285
286 -- Return a list of those networks enriched with all
287 -- other information that we store in the database
288 SELECT
289 DISTINCT ON (network)
290 network,
291 autnum,
292
293 -- Country
294 COALESCE(
295 (
296 SELECT country FROM network_overrides overrides
297 WHERE networks.network <<= overrides.network
298 ORDER BY masklen(overrides.network) DESC
299 LIMIT 1
300 ),
301 (
302 SELECT country FROM autnum_overrides overrides
303 WHERE networks.autnum = overrides.number
304 ),
305 networks.country
306 ) AS country,
307
308 -- Flags
309 COALESCE(
310 (
311 SELECT is_anonymous_proxy FROM network_overrides overrides
312 WHERE networks.network <<= overrides.network
313 ORDER BY masklen(overrides.network) DESC
314 LIMIT 1
315 ),
316 (
317 SELECT is_anonymous_proxy FROM autnum_overrides overrides
318 WHERE networks.autnum = overrides.number
319 ),
320 FALSE
321 ) AS is_anonymous_proxy,
322 COALESCE(
323 (
324 SELECT is_satellite_provider FROM network_overrides overrides
325 WHERE networks.network <<= overrides.network
326 ORDER BY masklen(overrides.network) DESC
327 LIMIT 1
328 ),
329 (
330 SELECT is_satellite_provider FROM autnum_overrides overrides
331 WHERE networks.autnum = overrides.number
332 ),
333 FALSE
334 ) AS is_satellite_provider,
335 COALESCE(
336 (
337 SELECT is_anycast FROM network_overrides overrides
338 WHERE networks.network <<= overrides.network
339 ORDER BY masklen(overrides.network) DESC
340 LIMIT 1
341 ),
342 (
343 SELECT is_anycast FROM autnum_overrides overrides
344 WHERE networks.autnum = overrides.number
345 ),
346 FALSE
347 ) AS is_anycast,
348 COALESCE(
349 (
350 SELECT is_drop FROM network_overrides overrides
351 WHERE networks.network <<= overrides.network
352 ORDER BY masklen(overrides.network) DESC
353 LIMIT 1
354 ),
355 (
356 SELECT is_drop FROM autnum_overrides overrides
357 WHERE networks.autnum = overrides.number
358 ),
359 FALSE
360 ) AS is_drop
361 FROM
362 ordered_networks networks
363 """)
364
365 for row in rows:
366 network = writer.add_network(row.network)
367
368 # Save country
369 if row.country:
370 network.country_code = row.country
371
372 # Save ASN
373 if row.autnum:
374 network.asn = row.autnum
375
376 # Set flags
377 if row.is_anonymous_proxy:
378 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
379
380 if row.is_satellite_provider:
381 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
382
383 if row.is_anycast:
384 network.set_flag(location.NETWORK_FLAG_ANYCAST)
385
386 if row.is_drop:
387 network.set_flag(location.NETWORK_FLAG_DROP)
388
389 # Add all countries
390 log.info("Writing countries...")
391 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
392
393 for row in rows:
394 c = writer.add_country(row.country_code)
395 c.continent_code = row.continent_code
396 c.name = row.name
397
398 # Write everything to file
399 log.info("Writing database to file...")
400 for file in ns.file:
401 writer.write(file)
402
403 def handle_update_whois(self, ns):
404 downloader = location.importer.Downloader()
405
406 # Download all sources
407 with self.db.transaction():
408 # Create some temporary tables to store parsed data
409 self.db.execute("""
410 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL, organization text NOT NULL, source text NOT NULL)
411 ON COMMIT DROP;
412 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
413
414 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL, name text NOT NULL, source text NOT NULL)
415 ON COMMIT DROP;
416 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
417
418 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL)
419 ON COMMIT DROP;
420 CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
421 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
422 """)
423
424 # Remove all previously imported content
425 self.db.execute("""
426 TRUNCATE TABLE networks;
427 """)
428
429 # Fetch all valid country codes to check parsed networks aganist...
430 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
431 validcountries = []
432
433 for row in rows:
434 validcountries.append(row.country_code)
435
436 for source_key in location.importer.WHOIS_SOURCES:
437 for single_url in location.importer.WHOIS_SOURCES[source_key]:
438 for block in downloader.request_blocks(single_url):
439 self._parse_block(block, source_key, validcountries)
440
441 # Process all parsed networks from every RIR we happen to have access to,
442 # insert the largest network chunks into the networks table immediately...
443 families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
444
445 for family in (row.family for row in families):
446 smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
447
448 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
449 SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
450
451 # ... determine any other prefixes for this network family, ...
452 prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
453 WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family)
454
455 # ... and insert networks with this prefix in case they provide additional
456 # information (i. e. subnet of a larger chunk with a different country)
457 for prefix in (row.prefix for row in prefixes):
458 self.db.execute("""
459 WITH candidates AS (
460 SELECT
461 _rirdata.network,
462 _rirdata.country,
463 _rirdata.original_countries,
464 _rirdata.source
465 FROM
466 _rirdata
467 WHERE
468 family(_rirdata.network) = %s
469 AND
470 masklen(_rirdata.network) = %s
471 ),
472 filtered AS (
473 SELECT
474 DISTINCT ON (c.network)
475 c.network,
476 c.country,
477 c.original_countries,
478 c.source,
479 masklen(networks.network),
480 networks.country AS parent_country
481 FROM
482 candidates c
483 LEFT JOIN
484 networks
485 ON
486 c.network << networks.network
487 ORDER BY
488 c.network,
489 masklen(networks.network) DESC NULLS LAST
490 )
491 INSERT INTO
492 networks(network, country, original_countries, source)
493 SELECT
494 network,
495 country,
496 original_countries,
497 source
498 FROM
499 filtered
500 WHERE
501 parent_country IS NULL
502 OR
503 country <> parent_country
504 ON CONFLICT DO NOTHING""",
505 family, prefix,
506 )
507
508 self.db.execute("""
509 INSERT INTO autnums(number, name, source)
510 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
511 JOIN _organizations ON _autnums.organization = _organizations.handle
512 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
513 """)
514
515 # Download all extended sources
516 for source_key in location.importer.EXTENDED_SOURCES:
517 for single_url in location.importer.EXTENDED_SOURCES[source_key]:
518 # Download data
519 for line in downloader.request_lines(single_url):
520 self._parse_line(line, source_key, validcountries)
521
522 # Download and import (technical) AS names from ARIN
523 self._import_as_names_from_arin()
524
525 def _check_parsed_network(self, network):
526 """
527 Assistive function to detect and subsequently sort out parsed
528 networks from RIR data (both Whois and so-called "extended sources"),
529 which are or have...
530
531 (a) not globally routable (RFC 1918 space, et al.)
532 (b) covering a too large chunk of the IP address space (prefix length
533 is < 7 for IPv4 networks, and < 10 for IPv6)
534 (c) "0.0.0.0" or "::" as a network address
535 (d) are too small for being publicly announced (we have decided not to
536 process them at the moment, as they significantly enlarge our
537 database without providing very helpful additional information)
538
539 This unfortunately is necessary due to brain-dead clutter across
540 various RIR databases, causing mismatches and eventually disruptions.
541
542 We will return False in case a network is not suitable for adding
543 it to our database, and True otherwise.
544 """
545
546 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
547 return False
548
549 if not network.is_global:
550 log.debug("Skipping non-globally routable network: %s" % network)
551 return False
552
553 if network.version == 4:
554 if network.prefixlen < 7:
555 log.debug("Skipping too big IP chunk: %s" % network)
556 return False
557
558 if network.prefixlen > 24:
559 log.debug("Skipping network too small to be publicly announced: %s" % network)
560 return False
561
562 if str(network.network_address) == "0.0.0.0":
563 log.debug("Skipping network based on 0.0.0.0: %s" % network)
564 return False
565
566 elif network.version == 6:
567 if network.prefixlen < 10:
568 log.debug("Skipping too big IP chunk: %s" % network)
569 return False
570
571 if network.prefixlen > 48:
572 log.debug("Skipping network too small to be publicly announced: %s" % network)
573 return False
574
575 if str(network.network_address) == "::":
576 log.debug("Skipping network based on '::': %s" % network)
577 return False
578
579 else:
580 # This should not happen...
581 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
582 return False
583
584 # In case we have made it here, the network is considered to
585 # be suitable for libloc consumption...
586 return True
587
588 def _check_parsed_asn(self, asn):
589 """
590 Assistive function to filter Autonomous System Numbers not being suitable
591 for adding to our database. Returns False in such cases, and True otherwise.
592 """
593
594 for start, end in VALID_ASN_RANGES:
595 if start <= asn and end >= asn:
596 return True
597
598 log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
599 return False
600
601 def _parse_block(self, block, source_key, validcountries = None):
602 # Get first line to find out what type of block this is
603 line = block[0]
604
605 # aut-num
606 if line.startswith("aut-num:"):
607 return self._parse_autnum_block(block, source_key)
608
609 # inetnum
610 if line.startswith("inet6num:") or line.startswith("inetnum:"):
611 return self._parse_inetnum_block(block, source_key, validcountries)
612
613 # organisation
614 elif line.startswith("organisation:"):
615 return self._parse_org_block(block, source_key)
616
617 def _parse_autnum_block(self, block, source_key):
618 autnum = {}
619 for line in block:
620 # Split line
621 key, val = split_line(line)
622
623 if key == "aut-num":
624 m = re.match(r"^(AS|as)(\d+)", val)
625 if m:
626 autnum["asn"] = m.group(2)
627
628 elif key == "org":
629 autnum[key] = val.upper()
630
631 elif key == "descr":
632 # Save the first description line as well...
633 if not key in autnum:
634 autnum[key] = val
635
636 # Skip empty objects
637 if not autnum or not "asn" in autnum:
638 return
639
640 # Insert a dummy organisation handle into our temporary organisations
641 # table in case the AS does not have an organisation handle set, but
642 # has a description (a quirk often observed in APNIC area), so we can
643 # later display at least some string for this AS.
644 if not "org" in autnum:
645 if "descr" in autnum:
646 autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn")
647
648 self.db.execute("INSERT INTO _organizations(handle, name, source) \
649 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
650 autnum.get("org"), autnum.get("descr"), source_key,
651 )
652 else:
653 log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
654 autnum.get("asn"))
655 return
656
657 # Insert into database
658 self.db.execute("INSERT INTO _autnums(number, organization, source) \
659 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
660 organization = excluded.organization",
661 autnum.get("asn"), autnum.get("org"), source_key,
662 )
663
664 def _parse_inetnum_block(self, block, source_key, validcountries = None):
665 log.debug("Parsing inetnum block:")
666
667 inetnum = {}
668 for line in block:
669 log.debug(line)
670
671 # Split line
672 key, val = split_line(line)
673
674 # Filter any inetnum records which are only referring to IP space
675 # not managed by that specific RIR...
676 if key == "netname":
677 if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()):
678 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
679 return
680
681 if key == "inetnum":
682 start_address, delim, end_address = val.partition("-")
683
684 # Strip any excess space
685 start_address, end_address = start_address.rstrip(), end_address.strip()
686
687 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
688 if start_address and not (delim or end_address):
689 try:
690 start_address = ipaddress.ip_network(start_address, strict=False)
691 except ValueError:
692 start_address = start_address.split("/")
693 ldigits = start_address[0].count(".")
694
695 # How many octets do we need to add?
696 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
697 if ldigits == 1:
698 start_address = start_address[0] + ".0.0/" + start_address[1]
699 elif ldigits == 2:
700 start_address = start_address[0] + ".0/" + start_address[1]
701 else:
702 log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line)
703 return
704
705 try:
706 start_address = ipaddress.ip_network(start_address, strict=False)
707 except ValueError:
708 log.warning("Could not parse line in LACNIC DB format: %s" % line)
709 return
710
711 # Enumerate first and last IP address of this network
712 end_address = start_address[-1]
713 start_address = start_address[0]
714
715 else:
716 # Convert to IP address
717 try:
718 start_address = ipaddress.ip_address(start_address)
719 end_address = ipaddress.ip_address(end_address)
720 except ValueError:
721 log.warning("Could not parse line: %s" % line)
722 return
723
724 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
725
726 elif key == "inet6num":
727 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
728
729 elif key == "country":
730 val = val.upper()
731
732 # Catch RIR data objects with more than one country code...
733 if not key in inetnum:
734 inetnum[key] = []
735 else:
736 if val in inetnum.get("country"):
737 # ... but keep this list distinct...
738 continue
739
740 # When people set country codes to "UK", they actually mean "GB"
741 if val == "UK":
742 val = "GB"
743
744 inetnum[key].append(val)
745
746 # Skip empty objects
747 if not inetnum or not "country" in inetnum:
748 return
749
750 # Prepare skipping objects with unknown country codes...
751 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
752
753 # Iterate through all networks enumerated from above, check them for plausibility and insert
754 # them into the database, if _check_parsed_network() succeeded
755 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
756 if self._check_parsed_network(single_network):
757
758 # Skip objects with unknown country codes if they are valid to avoid log spam...
759 if validcountries and invalidcountries:
760 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
761 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
762 break
763
764 # Everything is fine here, run INSERT statement...
765 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
766 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
767 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
768 )
769
770 def _parse_org_block(self, block, source_key):
771 org = {}
772 for line in block:
773 # Split line
774 key, val = split_line(line)
775
776 if key == "organisation":
777 org[key] = val.upper()
778 elif key == "org-name":
779 org[key] = val
780
781 # Skip empty objects
782 if not org:
783 return
784
785 self.db.execute("INSERT INTO _organizations(handle, name, source) \
786 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
787 UPDATE SET name = excluded.name",
788 org.get("organisation"), org.get("org-name"), source_key,
789 )
790
791 def _parse_line(self, line, source_key, validcountries = None):
792 # Skip version line
793 if line.startswith("2"):
794 return
795
796 # Skip comments
797 if line.startswith("#"):
798 return
799
800 try:
801 registry, country_code, type, line = line.split("|", 3)
802 except:
803 log.warning("Could not parse line: %s" % line)
804 return
805
806 # Skip any lines that are for stats only or do not have a country
807 # code at all (avoids log spam below)
808 if not country_code or country_code == '*':
809 return
810
811 # Skip objects with unknown country codes
812 if validcountries and country_code not in validcountries:
813 log.warning("Skipping line with bogus country '%s': %s" % \
814 (country_code, line))
815 return
816
817 if type in ("ipv6", "ipv4"):
818 return self._parse_ip_line(country_code, type, line, source_key)
819
820 def _parse_ip_line(self, country, type, line, source_key):
821 try:
822 address, prefix, date, status, organization = line.split("|")
823 except ValueError:
824 organization = None
825
826 # Try parsing the line without organization
827 try:
828 address, prefix, date, status = line.split("|")
829 except ValueError:
830 log.warning("Unhandled line format: %s" % line)
831 return
832
833 # Skip anything that isn't properly assigned
834 if not status in ("assigned", "allocated"):
835 return
836
837 # Cast prefix into an integer
838 try:
839 prefix = int(prefix)
840 except:
841 log.warning("Invalid prefix: %s" % prefix)
842 return
843
844 # Fix prefix length for IPv4
845 if type == "ipv4":
846 prefix = 32 - int(math.log(prefix, 2))
847
848 # Try to parse the address
849 try:
850 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
851 except ValueError:
852 log.warning("Invalid IP address: %s" % address)
853 return
854
855 if not self._check_parsed_network(network):
856 return
857
858 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
859 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
860 UPDATE SET country = excluded.country",
861 "%s" % network, country, [country], source_key,
862 )
863
864 def _import_as_names_from_arin(self):
865 downloader = location.importer.Downloader()
866
867 # XXX: Download AS names file from ARIN (note that these names appear to be quite
868 # technical, not intended for human consumption, as description fields in
869 # organisation handles for other RIRs are - however, this is what we have got,
870 # and in some cases, it might be still better than nothing)
871 for line in downloader.request_lines("https://ftp.arin.net/info/asn.txt"):
872 # Valid lines start with a space, followed by the number of the Autonomous System ...
873 if not line.startswith(" "):
874 continue
875
876 # Split line and check if there is a valid ASN in it...
877 asn, name = line.split()[0:2]
878
879 try:
880 asn = int(asn)
881 except ValueError:
882 log.debug("Skipping ARIN AS names line not containing an integer for ASN")
883 continue
884
885 # Filter invalid ASNs...
886 if not self._check_parsed_asn(asn):
887 continue
888
889 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
890 if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|LACNIC|RIPE|IANA)(?:\d?$|\-)", name):
891 continue
892
893 # Bail out in case the AS name contains anything we do not expect here...
894 if re.search(r"[^a-zA-Z0-9-_]", name):
895 log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
896 (asn, name))
897
898 # Things look good here, run INSERT statement and skip this one if we already have
899 # a (better?) name for this Autonomous System...
900 self.db.execute("""
901 INSERT INTO autnums(
902 number,
903 name,
904 source
905 ) VALUES (%s, %s, %s)
906 ON CONFLICT (number) DO NOTHING""",
907 asn,
908 name,
909 "ARIN",
910 )
911
912 def handle_update_announcements(self, ns):
913 server = ns.server[0]
914
915 with self.db.transaction():
916 if server.startswith("/"):
917 self._handle_update_announcements_from_bird(server)
918 else:
919 self._handle_update_announcements_from_telnet(server)
920
921 # Purge anything we never want here
922 self.db.execute("""
923 -- Delete default routes
924 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
925
926 -- Delete anything that is not global unicast address space
927 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
928
929 -- DELETE "current network" address space
930 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
931
932 -- DELETE local loopback address space
933 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
934
935 -- DELETE RFC 1918 address space
936 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
937 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
938 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
939
940 -- DELETE test, benchmark and documentation address space
941 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
942 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
943 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
944 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
945 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
946
947 -- DELETE CGNAT address space (RFC 6598)
948 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
949
950 -- DELETE link local address space
951 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
952
953 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
954 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
955 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
956
957 -- DELETE multicast and reserved address space
958 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
959 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
960
961 -- Delete networks that are too small to be in the global routing table
962 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
963 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
964
965 -- Delete any non-public or reserved ASNs
966 DELETE FROM announcements WHERE NOT (
967 (autnum >= 1 AND autnum <= 23455)
968 OR
969 (autnum >= 23457 AND autnum <= 64495)
970 OR
971 (autnum >= 131072 AND autnum <= 4199999999)
972 );
973
974 -- Delete everything that we have not seen for 14 days
975 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
976 """)
977
978 def _handle_update_announcements_from_bird(self, server):
979 # Pre-compile the regular expression for faster searching
980 route = re.compile(b"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
981
982 log.info("Requesting routing table from Bird (%s)" % server)
983
984 aggregated_networks = []
985
986 # Send command to list all routes
987 for line in self._bird_cmd(server, "show route"):
988 m = route.match(line)
989 if not m:
990 # Skip empty lines
991 if not line:
992 pass
993
994 # Ignore any header lines with the name of the routing table
995 elif line.startswith(b"Table"):
996 pass
997
998 # Log anything else
999 else:
1000 log.debug("Could not parse line: %s" % line.decode())
1001
1002 continue
1003
1004 # Fetch the extracted network and ASN
1005 network, autnum = m.groups()
1006
1007 # Decode into strings
1008 if network:
1009 network = network.decode()
1010 if autnum:
1011 autnum = autnum.decode()
1012
1013 # Collect all aggregated networks
1014 if not autnum:
1015 log.debug("%s is an aggregated network" % network)
1016 aggregated_networks.append(network)
1017 continue
1018
1019 # Insert it into the database
1020 self.db.execute("INSERT INTO announcements(network, autnum) \
1021 VALUES(%s, %s) ON CONFLICT (network) DO \
1022 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1023 network, autnum,
1024 )
1025
1026 # Process any aggregated networks
1027 for network in aggregated_networks:
1028 log.debug("Processing aggregated network %s" % network)
1029
1030 # Run "show route all" for each network
1031 for line in self._bird_cmd(server, "show route %s all" % network):
1032 # Try finding the path
1033 m = re.match(b"\s+BGP\.as_path:.* (\d+) {\d+}$", line)
1034 if m:
1035 # Select the last AS number in the path
1036 autnum = m.group(1).decode()
1037
1038 # Insert it into the database
1039 self.db.execute("INSERT INTO announcements(network, autnum) \
1040 VALUES(%s, %s) ON CONFLICT (network) DO \
1041 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1042 network, autnum,
1043 )
1044
1045 # We don't need to process any more
1046 break
1047
1048 def _handle_update_announcements_from_telnet(self, server):
1049 # Pre-compile regular expression for routes
1050 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
1051
1052 with telnetlib.Telnet(server) as t:
1053 # Enable debug mode
1054 #if ns.debug:
1055 # t.set_debuglevel(10)
1056
1057 # Wait for console greeting
1058 greeting = t.read_until(b"> ", timeout=30)
1059 if not greeting:
1060 log.error("Could not get a console prompt")
1061 return 1
1062
1063 # Disable pagination
1064 t.write(b"terminal length 0\n")
1065
1066 # Wait for the prompt to return
1067 t.read_until(b"> ")
1068
1069 # Fetch the routing tables
1070 for protocol in ("ipv6", "ipv4"):
1071 log.info("Requesting %s routing table" % protocol)
1072
1073 # Request the full unicast routing table
1074 t.write(b"show bgp %s unicast\n" % protocol.encode())
1075
1076 # Read entire header which ends with "Path"
1077 t.read_until(b"Path\r\n")
1078
1079 while True:
1080 # Try reading a full entry
1081 # Those might be broken across multiple lines but ends with i
1082 line = t.read_until(b"i\r\n", timeout=5)
1083 if not line:
1084 break
1085
1086 # Show line for debugging
1087 #log.debug(repr(line))
1088
1089 # Try finding a route in here
1090 m = route.match(line)
1091 if m:
1092 network, autnum = m.groups()
1093
1094 # Convert network to string
1095 network = network.decode()
1096
1097 # Append /24 for IPv4 addresses
1098 if not "/" in network and not ":" in network:
1099 network = "%s/24" % network
1100
1101 # Convert AS number to integer
1102 autnum = int(autnum)
1103
1104 log.info("Found announcement for %s by %s" % (network, autnum))
1105
1106 self.db.execute("INSERT INTO announcements(network, autnum) \
1107 VALUES(%s, %s) ON CONFLICT (network) DO \
1108 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1109 network, autnum,
1110 )
1111
1112 log.info("Finished reading the %s routing table" % protocol)
1113
1114 def _bird_cmd(self, socket_path, command):
1115 # Connect to the socket
1116 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1117 s.connect(socket_path)
1118
1119 # Allocate some buffer
1120 buffer = b""
1121
1122 log.debug("Sending Bird command: %s" % command)
1123
1124 # Send the command
1125 s.send(b"%s\n" % command.encode())
1126
1127 while True:
1128 # Fill up the buffer
1129 buffer += s.recv(4096)
1130
1131 while True:
1132 # Search for the next newline
1133 pos = buffer.find(b"\n")
1134
1135 # If we cannot find one, we go back and read more data
1136 if pos <= 0:
1137 break
1138
1139 # Cut after the newline character
1140 pos += 1
1141
1142 # Split the line we want and keep the rest in buffer
1143 line, buffer = buffer[:pos], buffer[pos:]
1144
1145 # Try parsing any status lines
1146 if len(line) > 4 and line[:4].isdigit() and line[4] in (32, 45):
1147 code, delim, line = int(line[:4]), line[4], line[5:]
1148
1149 log.debug("Received response code %s from bird" % code)
1150
1151 # End of output
1152 if code == 0:
1153 return
1154
1155 # Ignore hello line
1156 elif code == 1:
1157 continue
1158
1159 # Otherwise return the line
1160 yield line
1161
1162 def handle_update_overrides(self, ns):
1163 with self.db.transaction():
1164 # Only drop manually created overrides, as we can be reasonably sure to have them,
1165 # and preserve the rest. If appropriate, it is deleted by correspondent functions.
1166 self.db.execute("""
1167 DELETE FROM autnum_overrides WHERE source = 'manual';
1168 DELETE FROM network_overrides WHERE source = 'manual';
1169 """)
1170
1171 # Update overrides for various cloud providers big enough to publish their own IP
1172 # network allocation lists in a machine-readable format...
1173 self._update_overrides_for_aws()
1174
1175 # Update overrides for Spamhaus DROP feeds...
1176 self._update_overrides_for_spamhaus_drop()
1177
1178 for file in ns.files:
1179 log.info("Reading %s..." % file)
1180
1181 with open(file, "rb") as f:
1182 for type, block in location.importer.read_blocks(f):
1183 if type == "net":
1184 network = block.get("net")
1185 # Try to parse and normalise the network
1186 try:
1187 network = ipaddress.ip_network(network, strict=False)
1188 except ValueError as e:
1189 log.warning("Invalid IP network: %s: %s" % (network, e))
1190 continue
1191
1192 # Prevent that we overwrite all networks
1193 if network.prefixlen == 0:
1194 log.warning("Skipping %s: You cannot overwrite default" % network)
1195 continue
1196
1197 self.db.execute("""
1198 INSERT INTO network_overrides(
1199 network,
1200 country,
1201 source,
1202 is_anonymous_proxy,
1203 is_satellite_provider,
1204 is_anycast,
1205 is_drop
1206 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
1207 ON CONFLICT (network) DO NOTHING""",
1208 "%s" % network,
1209 block.get("country"),
1210 "manual",
1211 self._parse_bool(block, "is-anonymous-proxy"),
1212 self._parse_bool(block, "is-satellite-provider"),
1213 self._parse_bool(block, "is-anycast"),
1214 self._parse_bool(block, "drop"),
1215 )
1216
1217 elif type == "aut-num":
1218 autnum = block.get("aut-num")
1219
1220 # Check if AS number begins with "AS"
1221 if not autnum.startswith("AS"):
1222 log.warning("Invalid AS number: %s" % autnum)
1223 continue
1224
1225 # Strip "AS"
1226 autnum = autnum[2:]
1227
1228 self.db.execute("""
1229 INSERT INTO autnum_overrides(
1230 number,
1231 name,
1232 country,
1233 source,
1234 is_anonymous_proxy,
1235 is_satellite_provider,
1236 is_anycast,
1237 is_drop
1238 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
1239 ON CONFLICT DO NOTHING""",
1240 autnum,
1241 block.get("name"),
1242 block.get("country"),
1243 "manual",
1244 self._parse_bool(block, "is-anonymous-proxy"),
1245 self._parse_bool(block, "is-satellite-provider"),
1246 self._parse_bool(block, "is-anycast"),
1247 self._parse_bool(block, "drop"),
1248 )
1249
1250 else:
1251 log.warning("Unsupported type: %s" % type)
1252
1253 def _update_overrides_for_aws(self):
1254 # Download Amazon AWS IP allocation file to create overrides...
1255 downloader = location.importer.Downloader()
1256
1257 try:
1258 # Fetch IP ranges
1259 f = downloader.retrieve("https://ip-ranges.amazonaws.com/ip-ranges.json")
1260
1261 # Parse downloaded file
1262 aws_ip_dump = json.load(f)
1263 except Exception as e:
1264 log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
1265 return
1266
1267 # At this point, we can assume the downloaded file to be valid
1268 self.db.execute("""
1269 DELETE FROM network_overrides WHERE source = 'Amazon AWS IP feed';
1270 """)
1271
1272 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1273 # there seems to be no machine-readable version available of this other than
1274 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1275 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1276 # was helpful here as well.
1277 aws_region_country_map = {
1278 "af-south-1": "ZA",
1279 "ap-east-1": "HK",
1280 "ap-south-1": "IN",
1281 "ap-south-2": "IN",
1282 "ap-northeast-3": "JP",
1283 "ap-northeast-2": "KR",
1284 "ap-southeast-1": "SG",
1285 "ap-southeast-2": "AU",
1286 "ap-southeast-3": "MY",
1287 "ap-southeast-4": "AU",
1288 "ap-northeast-1": "JP",
1289 "ca-central-1": "CA",
1290 "eu-central-1": "DE",
1291 "eu-central-2": "CH",
1292 "eu-west-1": "IE",
1293 "eu-west-2": "GB",
1294 "eu-south-1": "IT",
1295 "eu-south-2": "ES",
1296 "eu-west-3": "FR",
1297 "eu-north-1": "SE",
1298 "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
1299 "me-central-1": "AE",
1300 "me-south-1": "BH",
1301 "sa-east-1": "BR"
1302 }
1303
1304 # Fetch all valid country codes to check parsed networks aganist...
1305 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
1306 validcountries = []
1307
1308 for row in rows:
1309 validcountries.append(row.country_code)
1310
1311 with self.db.transaction():
1312 for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
1313 try:
1314 network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
1315 except ValueError:
1316 log.warning("Unable to parse line: %s" % snetwork)
1317 continue
1318
1319 # Sanitize parsed networks...
1320 if not self._check_parsed_network(network):
1321 continue
1322
1323 # Determine region of this network...
1324 region = snetwork["region"]
1325 cc = None
1326 is_anycast = False
1327
1328 # Any region name starting with "us-" will get "US" country code assigned straight away...
1329 if region.startswith("us-"):
1330 cc = "US"
1331 elif region.startswith("cn-"):
1332 # ... same goes for China ...
1333 cc = "CN"
1334 elif region == "GLOBAL":
1335 # ... funny region name for anycast-like networks ...
1336 is_anycast = True
1337 elif region in aws_region_country_map:
1338 # ... assign looked up country code otherwise ...
1339 cc = aws_region_country_map[region]
1340 else:
1341 # ... and bail out if we are missing something here
1342 log.warning("Unable to determine country code for line: %s" % snetwork)
1343 continue
1344
1345 # Skip networks with unknown country codes
1346 if not is_anycast and validcountries and cc not in validcountries:
1347 log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1348 (cc, network))
1349 return
1350
1351 # Conduct SQL statement...
1352 self.db.execute("""
1353 INSERT INTO network_overrides(
1354 network,
1355 country,
1356 source,
1357 is_anonymous_proxy,
1358 is_satellite_provider,
1359 is_anycast
1360 ) VALUES (%s, %s, %s, %s, %s, %s)
1361 ON CONFLICT (network) DO NOTHING""",
1362 "%s" % network,
1363 cc,
1364 "Amazon AWS IP feed",
1365 None,
1366 None,
1367 is_anycast,
1368 )
1369
1370
1371 def _update_overrides_for_spamhaus_drop(self):
1372 downloader = location.importer.Downloader()
1373
1374 ip_urls = [
1375 "https://www.spamhaus.org/drop/drop.txt",
1376 "https://www.spamhaus.org/drop/edrop.txt",
1377 "https://www.spamhaus.org/drop/dropv6.txt"
1378 ]
1379
1380 asn_urls = [
1381 "https://www.spamhaus.org/drop/asndrop.txt"
1382 ]
1383
1384 for url in ip_urls:
1385 # Fetch IP list
1386 f = downloader.retrieve(url)
1387
1388 # Split into lines
1389 fcontent = f.readlines()
1390
1391 # Conduct a very basic sanity check to rule out CDN issues causing bogus DROP
1392 # downloads.
1393 if len(fcontent) > 10:
1394 self.db.execute("""
1395 DELETE FROM autnum_overrides WHERE source = 'Spamhaus ASN-DROP list';
1396 DELETE FROM network_overrides WHERE source = 'Spamhaus DROP lists';
1397 """)
1398 else:
1399 log.error("Spamhaus DROP URL %s returned likely bogus file, ignored" % url)
1400 continue
1401
1402 # Iterate through every line, filter comments and add remaining networks to
1403 # the override table in case they are valid...
1404 with self.db.transaction():
1405 for sline in fcontent:
1406 # The response is assumed to be encoded in UTF-8...
1407 sline = sline.decode("utf-8")
1408
1409 # Comments start with a semicolon...
1410 if sline.startswith(";"):
1411 continue
1412
1413 # Extract network and ignore anything afterwards...
1414 try:
1415 network = ipaddress.ip_network(sline.split()[0], strict=False)
1416 except ValueError:
1417 log.error("Unable to parse line: %s" % sline)
1418 continue
1419
1420 # Sanitize parsed networks...
1421 if not self._check_parsed_network(network):
1422 log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
1423 (url, network))
1424 continue
1425
1426 # Conduct SQL statement...
1427 self.db.execute("""
1428 INSERT INTO network_overrides(
1429 network,
1430 source,
1431 is_drop
1432 ) VALUES (%s, %s, %s)
1433 ON CONFLICT (network) DO UPDATE SET is_drop = True""",
1434 "%s" % network,
1435 "Spamhaus DROP lists",
1436 True
1437 )
1438
1439 for url in asn_urls:
1440 # Fetch URL
1441 f = downloader.retrieve(url)
1442
1443 # Iterate through every line, filter comments and add remaining ASNs to
1444 # the override table in case they are valid...
1445 with self.db.transaction():
1446 for sline in t.readlines():
1447 # The response is assumed to be encoded in UTF-8...
1448 sline = sline.decode("utf-8")
1449
1450 # Comments start with a semicolon...
1451 if sline.startswith(";"):
1452 continue
1453
1454 # Throw away anything after the first space...
1455 sline = sline.split()[0]
1456
1457 # ... strip the "AS" prefix from it ...
1458 sline = sline.strip("AS")
1459
1460 # ... and convert it into an integer. Voila.
1461 asn = int(sline)
1462
1463 # Filter invalid ASNs...
1464 if not self._check_parsed_asn(asn):
1465 log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
1466 (url, asn))
1467 continue
1468
1469 # Conduct SQL statement...
1470 self.db.execute("""
1471 INSERT INTO autnum_overrides(
1472 number,
1473 source,
1474 is_drop
1475 ) VALUES (%s, %s, %s)
1476 ON CONFLICT (number) DO UPDATE SET is_drop = True""",
1477 "%s" % asn,
1478 "Spamhaus ASN-DROP list",
1479 True
1480 )
1481
1482 @staticmethod
1483 def _parse_bool(block, key):
1484 val = block.get(key)
1485
1486 # There is no point to proceed when we got None
1487 if val is None:
1488 return
1489
1490 # Convert to lowercase
1491 val = val.lower()
1492
1493 # True
1494 if val in ("yes", "1"):
1495 return True
1496
1497 # False
1498 if val in ("no", "0"):
1499 return False
1500
1501 # Default to None
1502 return None
1503
1504 def handle_import_countries(self, ns):
1505 with self.db.transaction():
1506 # Drop all data that we have
1507 self.db.execute("TRUNCATE TABLE countries")
1508
1509 for file in ns.file:
1510 for line in file:
1511 line = line.rstrip()
1512
1513 # Ignore any comments
1514 if line.startswith("#"):
1515 continue
1516
1517 try:
1518 country_code, continent_code, name = line.split(maxsplit=2)
1519 except:
1520 log.warning("Could not parse line: %s" % line)
1521 continue
1522
1523 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
1524 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
1525
1526
1527 def split_line(line):
1528 key, colon, val = line.partition(":")
1529
1530 # Strip any excess space
1531 key = key.strip()
1532 val = val.strip()
1533
1534 return key, val
1535
1536 def main():
1537 # Run the command line interface
1538 c = CLI()
1539 c.run()
1540
1541 main()