]> git.ipfire.org Git - location/libloc.git/blob - src/python/location-importer.in
importer: Skip empty lines
[location/libloc.git] / src / python / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020-2022 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import json
23 import logging
24 import math
25 import re
26 import socket
27 import sys
28 import telnetlib
29
30 # Load our location module
31 import location
32 import location.database
33 import location.importer
34 from location.i18n import _
35
36 # Initialise logging
37 log = logging.getLogger("location.importer")
38 log.propagate = 1
39
40 # Define constants
41 VALID_ASN_RANGES = (
42 (1, 23455),
43 (23457, 64495),
44 (131072, 4199999999),
45 )
46
47
48 class CLI(object):
49 def parse_cli(self):
50 parser = argparse.ArgumentParser(
51 description=_("Location Importer Command Line Interface"),
52 )
53 subparsers = parser.add_subparsers()
54
55 # Global configuration flags
56 parser.add_argument("--debug", action="store_true",
57 help=_("Enable debug output"))
58 parser.add_argument("--quiet", action="store_true",
59 help=_("Enable quiet mode"))
60
61 # version
62 parser.add_argument("--version", action="version",
63 version="%(prog)s @VERSION@")
64
65 # Database
66 parser.add_argument("--database-host", required=True,
67 help=_("Database Hostname"), metavar=_("HOST"))
68 parser.add_argument("--database-name", required=True,
69 help=_("Database Name"), metavar=_("NAME"))
70 parser.add_argument("--database-username", required=True,
71 help=_("Database Username"), metavar=_("USERNAME"))
72 parser.add_argument("--database-password", required=True,
73 help=_("Database Password"), metavar=_("PASSWORD"))
74
75 # Write Database
76 write = subparsers.add_parser("write", help=_("Write database to file"))
77 write.set_defaults(func=self.handle_write)
78 write.add_argument("file", nargs=1, help=_("Database File"))
79 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
80 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
81 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
82 write.add_argument("--description", nargs="?", help=_("Sets a description"))
83 write.add_argument("--license", nargs="?", help=_("Sets the license"))
84 write.add_argument("--version", type=int, help=_("Database Format Version"))
85
86 # Update WHOIS
87 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
88 update_whois.set_defaults(func=self.handle_update_whois)
89
90 # Update announcements
91 update_announcements = subparsers.add_parser("update-announcements",
92 help=_("Update BGP Annoucements"))
93 update_announcements.set_defaults(func=self.handle_update_announcements)
94 update_announcements.add_argument("server", nargs=1,
95 help=_("Route Server to connect to"), metavar=_("SERVER"))
96
97 # Update overrides
98 update_overrides = subparsers.add_parser("update-overrides",
99 help=_("Update overrides"),
100 )
101 update_overrides.add_argument(
102 "files", nargs="+", help=_("Files to import"),
103 )
104 update_overrides.set_defaults(func=self.handle_update_overrides)
105
106 # Import countries
107 import_countries = subparsers.add_parser("import-countries",
108 help=_("Import countries"),
109 )
110 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
111 help=_("File to import"))
112 import_countries.set_defaults(func=self.handle_import_countries)
113
114 args = parser.parse_args()
115
116 # Configure logging
117 if args.debug:
118 location.logger.set_level(logging.DEBUG)
119 elif args.quiet:
120 location.logger.set_level(logging.WARNING)
121
122 # Print usage if no action was given
123 if not "func" in args:
124 parser.print_usage()
125 sys.exit(2)
126
127 return args
128
129 def run(self):
130 # Parse command line arguments
131 args = self.parse_cli()
132
133 # Initialise database
134 self.db = self._setup_database(args)
135
136 # Call function
137 ret = args.func(args)
138
139 # Return with exit code
140 if ret:
141 sys.exit(ret)
142
143 # Otherwise just exit
144 sys.exit(0)
145
146 def _setup_database(self, ns):
147 """
148 Initialise the database
149 """
150 # Connect to database
151 db = location.database.Connection(
152 host=ns.database_host, database=ns.database_name,
153 user=ns.database_username, password=ns.database_password,
154 )
155
156 with db.transaction():
157 db.execute("""
158 -- announcements
159 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
160 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
161 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
162 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
163 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
164 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
165
166 -- autnums
167 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
168 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
169 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
170
171 -- countries
172 CREATE TABLE IF NOT EXISTS countries(
173 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
174 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
175
176 -- networks
177 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
178 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
179 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
180 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
181 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
182 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
183
184 -- overrides
185 CREATE TABLE IF NOT EXISTS autnum_overrides(
186 number bigint NOT NULL,
187 name text,
188 country text,
189 is_anonymous_proxy boolean,
190 is_satellite_provider boolean,
191 is_anycast boolean
192 );
193 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
194 ON autnum_overrides(number);
195 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
196 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
197
198 CREATE TABLE IF NOT EXISTS network_overrides(
199 network inet NOT NULL,
200 country text,
201 is_anonymous_proxy boolean,
202 is_satellite_provider boolean,
203 is_anycast boolean
204 );
205 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
206 ON network_overrides(network);
207 CREATE INDEX IF NOT EXISTS network_overrides_search
208 ON network_overrides USING GIST(network inet_ops);
209 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
210 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
211 """)
212
213 return db
214
215 def handle_write(self, ns):
216 """
217 Compiles a database in libloc format out of what is in the database
218 """
219 # Allocate a writer
220 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
221
222 # Set all metadata
223 if ns.vendor:
224 writer.vendor = ns.vendor
225
226 if ns.description:
227 writer.description = ns.description
228
229 if ns.license:
230 writer.license = ns.license
231
232 # Add all Autonomous Systems
233 log.info("Writing Autonomous Systems...")
234
235 # Select all ASes with a name
236 rows = self.db.query("""
237 SELECT
238 autnums.number AS number,
239 COALESCE(
240 (SELECT overrides.name FROM autnum_overrides overrides
241 WHERE overrides.number = autnums.number),
242 autnums.name
243 ) AS name
244 FROM autnums
245 WHERE name <> %s ORDER BY number
246 """, "")
247
248 for row in rows:
249 a = writer.add_as(row.number)
250 a.name = row.name
251
252 # Add all networks
253 log.info("Writing networks...")
254
255 # Select all known networks
256 rows = self.db.query("""
257 -- Return a list of those networks enriched with all
258 -- other information that we store in the database
259 SELECT
260 DISTINCT ON (network)
261 network,
262 autnum,
263
264 -- Country
265 COALESCE(
266 (
267 SELECT country FROM network_overrides overrides
268 WHERE networks.network <<= overrides.network
269 ORDER BY masklen(overrides.network) DESC
270 LIMIT 1
271 ),
272 (
273 SELECT country FROM autnum_overrides overrides
274 WHERE networks.autnum = overrides.number
275 ),
276 networks.country
277 ) AS country,
278
279 -- Flags
280 COALESCE(
281 (
282 SELECT is_anonymous_proxy FROM network_overrides overrides
283 WHERE networks.network <<= overrides.network
284 ORDER BY masklen(overrides.network) DESC
285 LIMIT 1
286 ),
287 (
288 SELECT is_anonymous_proxy FROM autnum_overrides overrides
289 WHERE networks.autnum = overrides.number
290 ),
291 FALSE
292 ) AS is_anonymous_proxy,
293 COALESCE(
294 (
295 SELECT is_satellite_provider FROM network_overrides overrides
296 WHERE networks.network <<= overrides.network
297 ORDER BY masklen(overrides.network) DESC
298 LIMIT 1
299 ),
300 (
301 SELECT is_satellite_provider FROM autnum_overrides overrides
302 WHERE networks.autnum = overrides.number
303 ),
304 FALSE
305 ) AS is_satellite_provider,
306 COALESCE(
307 (
308 SELECT is_anycast FROM network_overrides overrides
309 WHERE networks.network <<= overrides.network
310 ORDER BY masklen(overrides.network) DESC
311 LIMIT 1
312 ),
313 (
314 SELECT is_anycast FROM autnum_overrides overrides
315 WHERE networks.autnum = overrides.number
316 ),
317 FALSE
318 ) AS is_anycast,
319 COALESCE(
320 (
321 SELECT is_drop FROM network_overrides overrides
322 WHERE networks.network <<= overrides.network
323 ORDER BY masklen(overrides.network) DESC
324 LIMIT 1
325 ),
326 (
327 SELECT is_drop FROM autnum_overrides overrides
328 WHERE networks.autnum = overrides.number
329 ),
330 FALSE
331 ) AS is_drop
332 FROM (
333 SELECT
334 known_networks.network AS network,
335 announcements.autnum AS autnum,
336 networks.country AS country,
337
338 -- Must be part of returned values for ORDER BY clause
339 masklen(announcements.network) AS sort_a,
340 masklen(networks.network) AS sort_b
341 FROM (
342 SELECT network FROM announcements
343 UNION ALL
344 SELECT network FROM networks
345 UNION ALL
346 SELECT network FROM network_overrides
347 ) known_networks
348 LEFT JOIN
349 announcements ON known_networks.network <<= announcements.network
350 LEFT JOIN
351 networks ON known_networks.network <<= networks.network
352 ORDER BY
353 known_networks.network,
354 sort_a DESC,
355 sort_b DESC
356 ) networks
357 """)
358
359 for row in rows:
360 network = writer.add_network(row.network)
361
362 # Save country
363 if row.country:
364 network.country_code = row.country
365
366 # Save ASN
367 if row.autnum:
368 network.asn = row.autnum
369
370 # Set flags
371 if row.is_anonymous_proxy:
372 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
373
374 if row.is_satellite_provider:
375 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
376
377 if row.is_anycast:
378 network.set_flag(location.NETWORK_FLAG_ANYCAST)
379
380 if row.is_drop:
381 network.set_flag(location.NETWORK_FLAG_DROP)
382
383 # Add all countries
384 log.info("Writing countries...")
385 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
386
387 for row in rows:
388 c = writer.add_country(row.country_code)
389 c.continent_code = row.continent_code
390 c.name = row.name
391
392 # Write everything to file
393 log.info("Writing database to file...")
394 for file in ns.file:
395 writer.write(file)
396
397 def handle_update_whois(self, ns):
398 downloader = location.importer.Downloader()
399
400 # Download all sources
401 with self.db.transaction():
402 # Create some temporary tables to store parsed data
403 self.db.execute("""
404 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL, organization text NOT NULL, source text NOT NULL)
405 ON COMMIT DROP;
406 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
407
408 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL, name text NOT NULL, source text NOT NULL)
409 ON COMMIT DROP;
410 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
411
412 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL)
413 ON COMMIT DROP;
414 CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
415 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
416 """)
417
418 # Remove all previously imported content
419 self.db.execute("""
420 TRUNCATE TABLE networks;
421 """)
422
423 # Fetch all valid country codes to check parsed networks aganist...
424 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
425 validcountries = []
426
427 for row in rows:
428 validcountries.append(row.country_code)
429
430 for source_key in location.importer.WHOIS_SOURCES:
431 for single_url in location.importer.WHOIS_SOURCES[source_key]:
432 with downloader.request(single_url, return_blocks=True) as f:
433 for block in f:
434 self._parse_block(block, source_key, validcountries)
435
436 # Process all parsed networks from every RIR we happen to have access to,
437 # insert the largest network chunks into the networks table immediately...
438 families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
439
440 for family in (row.family for row in families):
441 smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
442
443 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
444 SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
445
446 # ... determine any other prefixes for this network family, ...
447 prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
448 WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family)
449
450 # ... and insert networks with this prefix in case they provide additional
451 # information (i. e. subnet of a larger chunk with a different country)
452 for prefix in (row.prefix for row in prefixes):
453 self.db.execute("""
454 WITH candidates AS (
455 SELECT
456 _rirdata.network,
457 _rirdata.country,
458 _rirdata.original_countries,
459 _rirdata.source
460 FROM
461 _rirdata
462 WHERE
463 family(_rirdata.network) = %s
464 AND
465 masklen(_rirdata.network) = %s
466 ),
467 filtered AS (
468 SELECT
469 DISTINCT ON (c.network)
470 c.network,
471 c.country,
472 c.original_countries,
473 c.source,
474 masklen(networks.network),
475 networks.country AS parent_country
476 FROM
477 candidates c
478 LEFT JOIN
479 networks
480 ON
481 c.network << networks.network
482 ORDER BY
483 c.network,
484 masklen(networks.network) DESC NULLS LAST
485 )
486 INSERT INTO
487 networks(network, country, original_countries, source)
488 SELECT
489 network,
490 country,
491 original_countries,
492 source
493 FROM
494 filtered
495 WHERE
496 parent_country IS NULL
497 OR
498 country <> parent_country
499 ON CONFLICT DO NOTHING""",
500 family, prefix,
501 )
502
503 self.db.execute("""
504 INSERT INTO autnums(number, name, source)
505 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
506 JOIN _organizations ON _autnums.organization = _organizations.handle
507 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
508 """)
509
510 # Download all extended sources
511 for source_key in location.importer.EXTENDED_SOURCES:
512 for single_url in location.importer.EXTENDED_SOURCES[source_key]:
513 with self.db.transaction():
514 # Download data
515 with downloader.request(single_url) as f:
516 for line in f:
517 self._parse_line(line, source_key, validcountries)
518
519 # Download and import (technical) AS names from ARIN
520 self._import_as_names_from_arin()
521
522 def _check_parsed_network(self, network):
523 """
524 Assistive function to detect and subsequently sort out parsed
525 networks from RIR data (both Whois and so-called "extended sources"),
526 which are or have...
527
528 (a) not globally routable (RFC 1918 space, et al.)
529 (b) covering a too large chunk of the IP address space (prefix length
530 is < 7 for IPv4 networks, and < 10 for IPv6)
531 (c) "0.0.0.0" or "::" as a network address
532 (d) are too small for being publicly announced (we have decided not to
533 process them at the moment, as they significantly enlarge our
534 database without providing very helpful additional information)
535
536 This unfortunately is necessary due to brain-dead clutter across
537 various RIR databases, causing mismatches and eventually disruptions.
538
539 We will return False in case a network is not suitable for adding
540 it to our database, and True otherwise.
541 """
542
543 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
544 return False
545
546 if not network.is_global:
547 log.debug("Skipping non-globally routable network: %s" % network)
548 return False
549
550 if network.version == 4:
551 if network.prefixlen < 7:
552 log.debug("Skipping too big IP chunk: %s" % network)
553 return False
554
555 if network.prefixlen > 24:
556 log.debug("Skipping network too small to be publicly announced: %s" % network)
557 return False
558
559 if str(network.network_address) == "0.0.0.0":
560 log.debug("Skipping network based on 0.0.0.0: %s" % network)
561 return False
562
563 elif network.version == 6:
564 if network.prefixlen < 10:
565 log.debug("Skipping too big IP chunk: %s" % network)
566 return False
567
568 if network.prefixlen > 48:
569 log.debug("Skipping network too small to be publicly announced: %s" % network)
570 return False
571
572 if str(network.network_address) == "::":
573 log.debug("Skipping network based on '::': %s" % network)
574 return False
575
576 else:
577 # This should not happen...
578 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
579 return False
580
581 # In case we have made it here, the network is considered to
582 # be suitable for libloc consumption...
583 return True
584
585 def _check_parsed_asn(self, asn):
586 """
587 Assistive function to filter Autonomous System Numbers not being suitable
588 for adding to our database. Returns False in such cases, and True otherwise.
589 """
590
591 for start, end in VALID_ASN_RANGES:
592 if start <= asn and end >= asn:
593 return True
594
595 log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
596 return False
597
598 def _parse_block(self, block, source_key, validcountries = None):
599 # Get first line to find out what type of block this is
600 line = block[0]
601
602 # aut-num
603 if line.startswith("aut-num:"):
604 return self._parse_autnum_block(block, source_key)
605
606 # inetnum
607 if line.startswith("inet6num:") or line.startswith("inetnum:"):
608 return self._parse_inetnum_block(block, source_key, validcountries)
609
610 # organisation
611 elif line.startswith("organisation:"):
612 return self._parse_org_block(block, source_key)
613
614 def _parse_autnum_block(self, block, source_key):
615 autnum = {}
616 for line in block:
617 # Split line
618 key, val = split_line(line)
619
620 if key == "aut-num":
621 m = re.match(r"^(AS|as)(\d+)", val)
622 if m:
623 autnum["asn"] = m.group(2)
624
625 elif key == "org":
626 autnum[key] = val.upper()
627
628 elif key == "descr":
629 # Save the first description line as well...
630 if not key in autnum:
631 autnum[key] = val
632
633 # Skip empty objects
634 if not autnum or not "asn" in autnum:
635 return
636
637 # Insert a dummy organisation handle into our temporary organisations
638 # table in case the AS does not have an organisation handle set, but
639 # has a description (a quirk often observed in APNIC area), so we can
640 # later display at least some string for this AS.
641 if not "org" in autnum:
642 if "descr" in autnum:
643 autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn")
644
645 self.db.execute("INSERT INTO _organizations(handle, name, source) \
646 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
647 autnum.get("org"), autnum.get("descr"), source_key,
648 )
649 else:
650 log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
651 autnum.get("asn"))
652 return
653
654 # Insert into database
655 self.db.execute("INSERT INTO _autnums(number, organization, source) \
656 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
657 organization = excluded.organization",
658 autnum.get("asn"), autnum.get("org"), source_key,
659 )
660
661 def _parse_inetnum_block(self, block, source_key, validcountries = None):
662 log.debug("Parsing inetnum block:")
663
664 inetnum = {}
665 for line in block:
666 log.debug(line)
667
668 # Split line
669 key, val = split_line(line)
670
671 # Filter any inetnum records which are only referring to IP space
672 # not managed by that specific RIR...
673 if key == "netname":
674 if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()):
675 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
676 return
677
678 if key == "inetnum":
679 start_address, delim, end_address = val.partition("-")
680
681 # Strip any excess space
682 start_address, end_address = start_address.rstrip(), end_address.strip()
683
684 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
685 if start_address and not (delim or end_address):
686 try:
687 start_address = ipaddress.ip_network(start_address, strict=False)
688 except ValueError:
689 start_address = start_address.split("/")
690 ldigits = start_address[0].count(".")
691
692 # How many octets do we need to add?
693 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
694 if ldigits == 1:
695 start_address = start_address[0] + ".0.0/" + start_address[1]
696 elif ldigits == 2:
697 start_address = start_address[0] + ".0/" + start_address[1]
698 else:
699 log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line)
700 return
701
702 try:
703 start_address = ipaddress.ip_network(start_address, strict=False)
704 except ValueError:
705 log.warning("Could not parse line in LACNIC DB format: %s" % line)
706 return
707
708 # Enumerate first and last IP address of this network
709 end_address = start_address[-1]
710 start_address = start_address[0]
711
712 else:
713 # Convert to IP address
714 try:
715 start_address = ipaddress.ip_address(start_address)
716 end_address = ipaddress.ip_address(end_address)
717 except ValueError:
718 log.warning("Could not parse line: %s" % line)
719 return
720
721 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
722
723 elif key == "inet6num":
724 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
725
726 elif key == "country":
727 val = val.upper()
728
729 # Catch RIR data objects with more than one country code...
730 if not key in inetnum:
731 inetnum[key] = []
732 else:
733 if val in inetnum.get("country"):
734 # ... but keep this list distinct...
735 continue
736
737 # When people set country codes to "UK", they actually mean "GB"
738 if val == "UK":
739 val = "GB"
740
741 inetnum[key].append(val)
742
743 # Skip empty objects
744 if not inetnum or not "country" in inetnum:
745 return
746
747 # Prepare skipping objects with unknown country codes...
748 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
749
750 # Iterate through all networks enumerated from above, check them for plausibility and insert
751 # them into the database, if _check_parsed_network() succeeded
752 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
753 if self._check_parsed_network(single_network):
754
755 # Skip objects with unknown country codes if they are valid to avoid log spam...
756 if validcountries and invalidcountries:
757 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
758 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
759 break
760
761 # Everything is fine here, run INSERT statement...
762 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
763 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
764 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
765 )
766
767 def _parse_org_block(self, block, source_key):
768 org = {}
769 for line in block:
770 # Split line
771 key, val = split_line(line)
772
773 if key == "organisation":
774 org[key] = val.upper()
775 elif key == "org-name":
776 org[key] = val
777
778 # Skip empty objects
779 if not org:
780 return
781
782 self.db.execute("INSERT INTO _organizations(handle, name, source) \
783 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
784 UPDATE SET name = excluded.name",
785 org.get("organisation"), org.get("org-name"), source_key,
786 )
787
788 def _parse_line(self, line, source_key, validcountries = None):
789 # Skip version line
790 if line.startswith("2"):
791 return
792
793 # Skip comments
794 if line.startswith("#"):
795 return
796
797 try:
798 registry, country_code, type, line = line.split("|", 3)
799 except:
800 log.warning("Could not parse line: %s" % line)
801 return
802
803 # Skip any lines that are for stats only or do not have a country
804 # code at all (avoids log spam below)
805 if not country_code or country_code == '*':
806 return
807
808 # Skip objects with unknown country codes
809 if validcountries and country_code not in validcountries:
810 log.warning("Skipping line with bogus country '%s': %s" % \
811 (country_code, line))
812 return
813
814 if type in ("ipv6", "ipv4"):
815 return self._parse_ip_line(country_code, type, line, source_key)
816
817 def _parse_ip_line(self, country, type, line, source_key):
818 try:
819 address, prefix, date, status, organization = line.split("|")
820 except ValueError:
821 organization = None
822
823 # Try parsing the line without organization
824 try:
825 address, prefix, date, status = line.split("|")
826 except ValueError:
827 log.warning("Unhandled line format: %s" % line)
828 return
829
830 # Skip anything that isn't properly assigned
831 if not status in ("assigned", "allocated"):
832 return
833
834 # Cast prefix into an integer
835 try:
836 prefix = int(prefix)
837 except:
838 log.warning("Invalid prefix: %s" % prefix)
839 return
840
841 # Fix prefix length for IPv4
842 if type == "ipv4":
843 prefix = 32 - int(math.log(prefix, 2))
844
845 # Try to parse the address
846 try:
847 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
848 except ValueError:
849 log.warning("Invalid IP address: %s" % address)
850 return
851
852 if not self._check_parsed_network(network):
853 return
854
855 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
856 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
857 UPDATE SET country = excluded.country",
858 "%s" % network, country, [country], source_key,
859 )
860
861 def _import_as_names_from_arin(self):
862 downloader = location.importer.Downloader()
863
864 # XXX: Download AS names file from ARIN (note that these names appear to be quite
865 # technical, not intended for human consumption, as description fields in
866 # organisation handles for other RIRs are - however, this is what we have got,
867 # and in some cases, it might be still better than nothing)
868 with downloader.request("https://ftp.arin.net/info/asn.txt", return_blocks=False) as f:
869 for line in f:
870 # Convert binary line to string...
871 line = str(line)
872
873 # ... valid lines start with a space, followed by the number of the Autonomous System ...
874 if not line.startswith(" "):
875 continue
876
877 # Split line and check if there is a valid ASN in it...
878 asn, name = line.split()[0:2]
879
880 try:
881 asn = int(asn)
882 except ValueError:
883 log.debug("Skipping ARIN AS names line not containing an integer for ASN")
884 continue
885
886 # Filter invalid ASNs...
887 if not self._check_parsed_asn(asn):
888 continue
889
890 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
891 if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|DNIC|LACNIC|RIPE|IANA)(?:\d?$|\-)", name):
892 continue
893
894 # Bail out in case the AS name contains anything we do not expect here...
895 if re.search(r"[^a-zA-Z0-9-_]", name):
896 log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
897 (asn, name))
898
899 # Things look good here, run INSERT statement and skip this one if we already have
900 # a (better?) name for this Autonomous System...
901 self.db.execute("""
902 INSERT INTO autnums(
903 number,
904 name,
905 source
906 ) VALUES (%s, %s, %s)
907 ON CONFLICT (number) DO NOTHING""",
908 asn,
909 name,
910 "ARIN",
911 )
912
913 def handle_update_announcements(self, ns):
914 server = ns.server[0]
915
916 with self.db.transaction():
917 if server.startswith("/"):
918 self._handle_update_announcements_from_bird(server)
919 else:
920 self._handle_update_announcements_from_telnet(server)
921
922 # Purge anything we never want here
923 self.db.execute("""
924 -- Delete default routes
925 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
926
927 -- Delete anything that is not global unicast address space
928 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
929
930 -- DELETE "current network" address space
931 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
932
933 -- DELETE local loopback address space
934 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
935
936 -- DELETE RFC 1918 address space
937 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
938 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
939 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
940
941 -- DELETE test, benchmark and documentation address space
942 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
943 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
944 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
945 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
946 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
947
948 -- DELETE CGNAT address space (RFC 6598)
949 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
950
951 -- DELETE link local address space
952 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
953
954 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
955 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
956 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
957
958 -- DELETE multicast and reserved address space
959 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
960 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
961
962 -- Delete networks that are too small to be in the global routing table
963 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
964 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
965
966 -- Delete any non-public or reserved ASNs
967 DELETE FROM announcements WHERE NOT (
968 (autnum >= 1 AND autnum <= 23455)
969 OR
970 (autnum >= 23457 AND autnum <= 64495)
971 OR
972 (autnum >= 131072 AND autnum <= 4199999999)
973 );
974
975 -- Delete everything that we have not seen for 14 days
976 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
977 """)
978
979 def _handle_update_announcements_from_bird(self, server):
980 # Pre-compile the regular expression for faster searching
981 route = re.compile(b"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
982
983 log.info("Requesting routing table from Bird (%s)" % server)
984
985 aggregated_networks = []
986
987 # Send command to list all routes
988 for line in self._bird_cmd(server, "show route"):
989 m = route.match(line)
990 if not m:
991 # Skip empty lines
992 if not line:
993 pass
994
995 # Ignore any header lines with the name of the routing table
996 elif line.startswith(b"Table"):
997 pass
998
999 # Log anything else
1000 else:
1001 log.debug("Could not parse line: %s" % line.decode())
1002
1003 continue
1004
1005 # Fetch the extracted network and ASN
1006 network, autnum = m.groups()
1007
1008 # Decode into strings
1009 if network:
1010 network = network.decode()
1011 if autnum:
1012 autnum = autnum.decode()
1013
1014 # Collect all aggregated networks
1015 if not autnum:
1016 log.debug("%s is an aggregated network" % network)
1017 aggregated_networks.append(network)
1018 continue
1019
1020 # Insert it into the database
1021 self.db.execute("INSERT INTO announcements(network, autnum) \
1022 VALUES(%s, %s) ON CONFLICT (network) DO \
1023 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1024 network, autnum,
1025 )
1026
1027 # Process any aggregated networks
1028 for network in aggregated_networks:
1029 log.debug("Processing aggregated network %s" % network)
1030
1031 # Run "show route all" for each network
1032 for line in self._bird_cmd(server, "show route %s all" % network):
1033 # Try finding the path
1034 m = re.match(b"\s+BGP\.as_path:.* (\d+) {\d+}$", line)
1035 if m:
1036 # Select the last AS number in the path
1037 autnum = m.group(1).decode()
1038
1039 # Insert it into the database
1040 self.db.execute("INSERT INTO announcements(network, autnum) \
1041 VALUES(%s, %s) ON CONFLICT (network) DO \
1042 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1043 network, autnum,
1044 )
1045
1046 # We don't need to process any more
1047 break
1048
1049 def _handle_update_announcements_from_telnet(self, server):
1050 # Pre-compile regular expression for routes
1051 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
1052
1053 with telnetlib.Telnet(server) as t:
1054 # Enable debug mode
1055 #if ns.debug:
1056 # t.set_debuglevel(10)
1057
1058 # Wait for console greeting
1059 greeting = t.read_until(b"> ", timeout=30)
1060 if not greeting:
1061 log.error("Could not get a console prompt")
1062 return 1
1063
1064 # Disable pagination
1065 t.write(b"terminal length 0\n")
1066
1067 # Wait for the prompt to return
1068 t.read_until(b"> ")
1069
1070 # Fetch the routing tables
1071 for protocol in ("ipv6", "ipv4"):
1072 log.info("Requesting %s routing table" % protocol)
1073
1074 # Request the full unicast routing table
1075 t.write(b"show bgp %s unicast\n" % protocol.encode())
1076
1077 # Read entire header which ends with "Path"
1078 t.read_until(b"Path\r\n")
1079
1080 while True:
1081 # Try reading a full entry
1082 # Those might be broken across multiple lines but ends with i
1083 line = t.read_until(b"i\r\n", timeout=5)
1084 if not line:
1085 break
1086
1087 # Show line for debugging
1088 #log.debug(repr(line))
1089
1090 # Try finding a route in here
1091 m = route.match(line)
1092 if m:
1093 network, autnum = m.groups()
1094
1095 # Convert network to string
1096 network = network.decode()
1097
1098 # Append /24 for IPv4 addresses
1099 if not "/" in network and not ":" in network:
1100 network = "%s/24" % network
1101
1102 # Convert AS number to integer
1103 autnum = int(autnum)
1104
1105 log.info("Found announcement for %s by %s" % (network, autnum))
1106
1107 self.db.execute("INSERT INTO announcements(network, autnum) \
1108 VALUES(%s, %s) ON CONFLICT (network) DO \
1109 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1110 network, autnum,
1111 )
1112
1113 log.info("Finished reading the %s routing table" % protocol)
1114
1115 def _bird_cmd(self, socket_path, command):
1116 # Connect to the socket
1117 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1118 s.connect(socket_path)
1119
1120 # Allocate some buffer
1121 buffer = b""
1122
1123 log.debug("Sending Bird command: %s" % command)
1124
1125 # Send the command
1126 s.send(b"%s\n" % command.encode())
1127
1128 while True:
1129 # Fill up the buffer
1130 buffer += s.recv(4096)
1131
1132 while True:
1133 # Search for the next newline
1134 pos = buffer.find(b"\n")
1135
1136 # If we cannot find one, we go back and read more data
1137 if pos <= 0:
1138 break
1139
1140 # Cut after the newline character
1141 pos += 1
1142
1143 # Split the line we want and keep the rest in buffer
1144 line, buffer = buffer[:pos], buffer[pos:]
1145
1146 # Try parsing any status lines
1147 if len(line) > 4 and line[:4].isdigit() and line[4] in (32, 45):
1148 code, delim, line = int(line[:4]), line[4], line[5:]
1149
1150 log.debug("Received response code %s from bird" % code)
1151
1152 # End of output
1153 if code == 0:
1154 return
1155
1156 # Ignore hello line
1157 elif code == 1:
1158 continue
1159
1160 # Otherwise return the line
1161 yield line
1162
1163 def handle_update_overrides(self, ns):
1164 with self.db.transaction():
1165 # Drop all data that we have
1166 self.db.execute("""
1167 TRUNCATE TABLE autnum_overrides;
1168 TRUNCATE TABLE network_overrides;
1169 """)
1170
1171 # Update overrides for various cloud providers big enough to publish their own IP
1172 # network allocation lists in a machine-readable format...
1173 self._update_overrides_for_aws()
1174
1175 # Update overrides for Spamhaus DROP feeds...
1176 self._update_overrides_for_spamhaus_drop()
1177
1178 for file in ns.files:
1179 log.info("Reading %s..." % file)
1180
1181 with open(file, "rb") as f:
1182 for type, block in location.importer.read_blocks(f):
1183 if type == "net":
1184 network = block.get("net")
1185 # Try to parse and normalise the network
1186 try:
1187 network = ipaddress.ip_network(network, strict=False)
1188 except ValueError as e:
1189 log.warning("Invalid IP network: %s: %s" % (network, e))
1190 continue
1191
1192 # Prevent that we overwrite all networks
1193 if network.prefixlen == 0:
1194 log.warning("Skipping %s: You cannot overwrite default" % network)
1195 continue
1196
1197 self.db.execute("""
1198 INSERT INTO network_overrides(
1199 network,
1200 country,
1201 source,
1202 is_anonymous_proxy,
1203 is_satellite_provider,
1204 is_anycast,
1205 is_drop
1206 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
1207 ON CONFLICT (network) DO NOTHING""",
1208 "%s" % network,
1209 block.get("country"),
1210 "manual",
1211 self._parse_bool(block, "is-anonymous-proxy"),
1212 self._parse_bool(block, "is-satellite-provider"),
1213 self._parse_bool(block, "is-anycast"),
1214 self._parse_bool(block, "drop"),
1215 )
1216
1217 elif type == "aut-num":
1218 autnum = block.get("aut-num")
1219
1220 # Check if AS number begins with "AS"
1221 if not autnum.startswith("AS"):
1222 log.warning("Invalid AS number: %s" % autnum)
1223 continue
1224
1225 # Strip "AS"
1226 autnum = autnum[2:]
1227
1228 self.db.execute("""
1229 INSERT INTO autnum_overrides(
1230 number,
1231 name,
1232 country,
1233 source,
1234 is_anonymous_proxy,
1235 is_satellite_provider,
1236 is_anycast,
1237 is_drop
1238 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
1239 ON CONFLICT DO NOTHING""",
1240 autnum,
1241 block.get("name"),
1242 block.get("country"),
1243 "manual",
1244 self._parse_bool(block, "is-anonymous-proxy"),
1245 self._parse_bool(block, "is-satellite-provider"),
1246 self._parse_bool(block, "is-anycast"),
1247 self._parse_bool(block, "drop"),
1248 )
1249
1250 else:
1251 log.warning("Unsupported type: %s" % type)
1252
1253 def _update_overrides_for_aws(self):
1254 # Download Amazon AWS IP allocation file to create overrides...
1255 downloader = location.importer.Downloader()
1256
1257 try:
1258 with downloader.request("https://ip-ranges.amazonaws.com/ip-ranges.json", return_blocks=False) as f:
1259 aws_ip_dump = json.load(f.body)
1260 except Exception as e:
1261 log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
1262 return
1263
1264 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1265 # there seems to be no machine-readable version available of this other than
1266 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1267 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1268 # was helpful here as well.
1269 aws_region_country_map = {
1270 "af-south-1": "ZA",
1271 "ap-east-1": "HK",
1272 "ap-south-1": "IN",
1273 "ap-south-2": "IN",
1274 "ap-northeast-3": "JP",
1275 "ap-northeast-2": "KR",
1276 "ap-southeast-1": "SG",
1277 "ap-southeast-2": "AU",
1278 "ap-southeast-3": "MY",
1279 "ap-southeast-4": "AU",
1280 "ap-northeast-1": "JP",
1281 "ca-central-1": "CA",
1282 "eu-central-1": "DE",
1283 "eu-central-2": "CH",
1284 "eu-west-1": "IE",
1285 "eu-west-2": "GB",
1286 "eu-south-1": "IT",
1287 "eu-south-2": "ES",
1288 "eu-west-3": "FR",
1289 "eu-north-1": "SE",
1290 "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
1291 "me-central-1": "AE",
1292 "me-south-1": "BH",
1293 "sa-east-1": "BR"
1294 }
1295
1296 # Fetch all valid country codes to check parsed networks aganist...
1297 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
1298 validcountries = []
1299
1300 for row in rows:
1301 validcountries.append(row.country_code)
1302
1303 with self.db.transaction():
1304 for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
1305 try:
1306 network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
1307 except ValueError:
1308 log.warning("Unable to parse line: %s" % snetwork)
1309 continue
1310
1311 # Sanitize parsed networks...
1312 if not self._check_parsed_network(network):
1313 continue
1314
1315 # Determine region of this network...
1316 region = snetwork["region"]
1317 cc = None
1318 is_anycast = False
1319
1320 # Any region name starting with "us-" will get "US" country code assigned straight away...
1321 if region.startswith("us-"):
1322 cc = "US"
1323 elif region.startswith("cn-"):
1324 # ... same goes for China ...
1325 cc = "CN"
1326 elif region == "GLOBAL":
1327 # ... funny region name for anycast-like networks ...
1328 is_anycast = True
1329 elif region in aws_region_country_map:
1330 # ... assign looked up country code otherwise ...
1331 cc = aws_region_country_map[region]
1332 else:
1333 # ... and bail out if we are missing something here
1334 log.warning("Unable to determine country code for line: %s" % snetwork)
1335 continue
1336
1337 # Skip networks with unknown country codes
1338 if not is_anycast and validcountries and cc not in validcountries:
1339 log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1340 (cc, network))
1341 return
1342
1343 # Conduct SQL statement...
1344 self.db.execute("""
1345 INSERT INTO network_overrides(
1346 network,
1347 country,
1348 source,
1349 is_anonymous_proxy,
1350 is_satellite_provider,
1351 is_anycast
1352 ) VALUES (%s, %s, %s, %s, %s, %s)
1353 ON CONFLICT (network) DO NOTHING""",
1354 "%s" % network,
1355 cc,
1356 "Amazon AWS IP feed",
1357 None,
1358 None,
1359 is_anycast,
1360 )
1361
1362
1363 def _update_overrides_for_spamhaus_drop(self):
1364 downloader = location.importer.Downloader()
1365
1366 ip_urls = [
1367 "https://www.spamhaus.org/drop/drop.txt",
1368 "https://www.spamhaus.org/drop/edrop.txt",
1369 "https://www.spamhaus.org/drop/dropv6.txt"
1370 ]
1371
1372 asn_urls = [
1373 "https://www.spamhaus.org/drop/asndrop.txt"
1374 ]
1375
1376 for url in ip_urls:
1377 try:
1378 with downloader.request(url, return_blocks=False) as f:
1379 fcontent = f.body.readlines()
1380 except Exception as e:
1381 log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e))
1382 return
1383
1384 # Iterate through every line, filter comments and add remaining networks to
1385 # the override table in case they are valid...
1386 with self.db.transaction():
1387 for sline in fcontent:
1388
1389 # The response is assumed to be encoded in UTF-8...
1390 sline = sline.decode("utf-8")
1391
1392 # Comments start with a semicolon...
1393 if sline.startswith(";"):
1394 continue
1395
1396 # Extract network and ignore anything afterwards...
1397 try:
1398 network = ipaddress.ip_network(sline.split()[0], strict=False)
1399 except ValueError:
1400 log.error("Unable to parse line: %s" % sline)
1401 continue
1402
1403 # Sanitize parsed networks...
1404 if not self._check_parsed_network(network):
1405 log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
1406 (url, network))
1407 continue
1408
1409 # Conduct SQL statement...
1410 self.db.execute("""
1411 INSERT INTO network_overrides(
1412 network,
1413 source,
1414 is_drop
1415 ) VALUES (%s, %s, %s)
1416 ON CONFLICT (network) DO UPDATE SET is_drop = True""",
1417 "%s" % network,
1418 "Spamhaus DROP lists",
1419 True
1420 )
1421
1422 for url in asn_urls:
1423 try:
1424 with downloader.request(url, return_blocks=False) as f:
1425 fcontent = f.body.readlines()
1426 except Exception as e:
1427 log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e))
1428 return
1429
1430 # Iterate through every line, filter comments and add remaining ASNs to
1431 # the override table in case they are valid...
1432 with self.db.transaction():
1433 for sline in fcontent:
1434
1435 # The response is assumed to be encoded in UTF-8...
1436 sline = sline.decode("utf-8")
1437
1438 # Comments start with a semicolon...
1439 if sline.startswith(";"):
1440 continue
1441
1442 # Throw away anything after the first space...
1443 sline = sline.split()[0]
1444
1445 # ... strip the "AS" prefix from it ...
1446 sline = sline.strip("AS")
1447
1448 # ... and convert it into an integer. Voila.
1449 asn = int(sline)
1450
1451 # Filter invalid ASNs...
1452 if not self._check_parsed_asn(asn):
1453 log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
1454 (url, asn))
1455 continue
1456
1457 # Conduct SQL statement...
1458 self.db.execute("""
1459 INSERT INTO autnum_overrides(
1460 number,
1461 source,
1462 is_drop
1463 ) VALUES (%s, %s, %s)
1464 ON CONFLICT (number) DO UPDATE SET is_drop = True""",
1465 "%s" % asn,
1466 "Spamhaus ASN-DROP list",
1467 True
1468 )
1469
1470 @staticmethod
1471 def _parse_bool(block, key):
1472 val = block.get(key)
1473
1474 # There is no point to proceed when we got None
1475 if val is None:
1476 return
1477
1478 # Convert to lowercase
1479 val = val.lower()
1480
1481 # True
1482 if val in ("yes", "1"):
1483 return True
1484
1485 # False
1486 if val in ("no", "0"):
1487 return False
1488
1489 # Default to None
1490 return None
1491
1492 def handle_import_countries(self, ns):
1493 with self.db.transaction():
1494 # Drop all data that we have
1495 self.db.execute("TRUNCATE TABLE countries")
1496
1497 for file in ns.file:
1498 for line in file:
1499 line = line.rstrip()
1500
1501 # Ignore any comments
1502 if line.startswith("#"):
1503 continue
1504
1505 try:
1506 country_code, continent_code, name = line.split(maxsplit=2)
1507 except:
1508 log.warning("Could not parse line: %s" % line)
1509 continue
1510
1511 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
1512 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
1513
1514
1515 def split_line(line):
1516 key, colon, val = line.partition(":")
1517
1518 # Strip any excess space
1519 key = key.strip()
1520 val = val.strip()
1521
1522 return key, val
1523
1524 def main():
1525 # Run the command line interface
1526 c = CLI()
1527 c.run()
1528
1529 main()