]> git.ipfire.org Git - location/libloc.git/blob - src/python/location-importer.in
85ee07987dcccf4312bc19f75624a073c360b31a
[location/libloc.git] / src / python / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020-2022 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import json
23 import logging
24 import math
25 import re
26 import socket
27 import sys
28 import telnetlib
29
30 # Load our location module
31 import location
32 import location.database
33 import location.importer
34 from location.i18n import _
35
36 # Initialise logging
37 log = logging.getLogger("location.importer")
38 log.propagate = 1
39
40 # Define constants
41 VALID_ASN_RANGES = (
42 (1, 23455),
43 (23457, 64495),
44 (131072, 4199999999),
45 )
46
47
48 class CLI(object):
49 def parse_cli(self):
50 parser = argparse.ArgumentParser(
51 description=_("Location Importer Command Line Interface"),
52 )
53 subparsers = parser.add_subparsers()
54
55 # Global configuration flags
56 parser.add_argument("--debug", action="store_true",
57 help=_("Enable debug output"))
58 parser.add_argument("--quiet", action="store_true",
59 help=_("Enable quiet mode"))
60
61 # version
62 parser.add_argument("--version", action="version",
63 version="%(prog)s @VERSION@")
64
65 # Database
66 parser.add_argument("--database-host", required=True,
67 help=_("Database Hostname"), metavar=_("HOST"))
68 parser.add_argument("--database-name", required=True,
69 help=_("Database Name"), metavar=_("NAME"))
70 parser.add_argument("--database-username", required=True,
71 help=_("Database Username"), metavar=_("USERNAME"))
72 parser.add_argument("--database-password", required=True,
73 help=_("Database Password"), metavar=_("PASSWORD"))
74
75 # Write Database
76 write = subparsers.add_parser("write", help=_("Write database to file"))
77 write.set_defaults(func=self.handle_write)
78 write.add_argument("file", nargs=1, help=_("Database File"))
79 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
80 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
81 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
82 write.add_argument("--description", nargs="?", help=_("Sets a description"))
83 write.add_argument("--license", nargs="?", help=_("Sets the license"))
84 write.add_argument("--version", type=int, help=_("Database Format Version"))
85
86 # Update WHOIS
87 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
88 update_whois.set_defaults(func=self.handle_update_whois)
89
90 # Update announcements
91 update_announcements = subparsers.add_parser("update-announcements",
92 help=_("Update BGP Annoucements"))
93 update_announcements.set_defaults(func=self.handle_update_announcements)
94 update_announcements.add_argument("server", nargs=1,
95 help=_("Route Server to connect to"), metavar=_("SERVER"))
96
97 # Update overrides
98 update_overrides = subparsers.add_parser("update-overrides",
99 help=_("Update overrides"),
100 )
101 update_overrides.add_argument(
102 "files", nargs="+", help=_("Files to import"),
103 )
104 update_overrides.set_defaults(func=self.handle_update_overrides)
105
106 # Import countries
107 import_countries = subparsers.add_parser("import-countries",
108 help=_("Import countries"),
109 )
110 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
111 help=_("File to import"))
112 import_countries.set_defaults(func=self.handle_import_countries)
113
114 args = parser.parse_args()
115
116 # Configure logging
117 if args.debug:
118 location.logger.set_level(logging.DEBUG)
119 elif args.quiet:
120 location.logger.set_level(logging.WARNING)
121
122 # Print usage if no action was given
123 if not "func" in args:
124 parser.print_usage()
125 sys.exit(2)
126
127 return args
128
129 def run(self):
130 # Parse command line arguments
131 args = self.parse_cli()
132
133 # Initialise database
134 self.db = self._setup_database(args)
135
136 # Call function
137 ret = args.func(args)
138
139 # Return with exit code
140 if ret:
141 sys.exit(ret)
142
143 # Otherwise just exit
144 sys.exit(0)
145
146 def _setup_database(self, ns):
147 """
148 Initialise the database
149 """
150 # Connect to database
151 db = location.database.Connection(
152 host=ns.database_host, database=ns.database_name,
153 user=ns.database_username, password=ns.database_password,
154 )
155
156 with db.transaction():
157 db.execute("""
158 -- announcements
159 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
160 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
161 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
162 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
163 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
164 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
165
166 -- autnums
167 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
168 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text;
169 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
170
171 -- countries
172 CREATE TABLE IF NOT EXISTS countries(
173 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
174 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
175
176 -- networks
177 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
178 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
179 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text;
180 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
181 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
182 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
183
184 -- overrides
185 CREATE TABLE IF NOT EXISTS autnum_overrides(
186 number bigint NOT NULL,
187 name text,
188 country text,
189 is_anonymous_proxy boolean,
190 is_satellite_provider boolean,
191 is_anycast boolean
192 );
193 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
194 ON autnum_overrides(number);
195 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
196 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
197
198 CREATE TABLE IF NOT EXISTS network_overrides(
199 network inet NOT NULL,
200 country text,
201 is_anonymous_proxy boolean,
202 is_satellite_provider boolean,
203 is_anycast boolean
204 );
205 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
206 ON network_overrides(network);
207 CREATE INDEX IF NOT EXISTS network_overrides_search
208 ON network_overrides USING GIST(network inet_ops);
209 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
210 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
211 """)
212
213 return db
214
215 def handle_write(self, ns):
216 """
217 Compiles a database in libloc format out of what is in the database
218 """
219 # Allocate a writer
220 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
221
222 # Set all metadata
223 if ns.vendor:
224 writer.vendor = ns.vendor
225
226 if ns.description:
227 writer.description = ns.description
228
229 if ns.license:
230 writer.license = ns.license
231
232 # Add all Autonomous Systems
233 log.info("Writing Autonomous Systems...")
234
235 # Select all ASes with a name
236 rows = self.db.query("""
237 SELECT
238 autnums.number AS number,
239 COALESCE(
240 (SELECT overrides.name FROM autnum_overrides overrides
241 WHERE overrides.number = autnums.number),
242 autnums.name
243 ) AS name
244 FROM autnums
245 WHERE name <> %s ORDER BY number
246 """, "")
247
248 for row in rows:
249 a = writer.add_as(row.number)
250 a.name = row.name
251
252 # Add all networks
253 log.info("Writing networks...")
254
255 # Select all known networks
256 rows = self.db.query("""
257 -- Return a list of those networks enriched with all
258 -- other information that we store in the database
259 SELECT
260 DISTINCT ON (network)
261 network,
262 autnum,
263
264 -- Country
265 COALESCE(
266 (
267 SELECT country FROM network_overrides overrides
268 WHERE networks.network <<= overrides.network
269 ORDER BY masklen(overrides.network) DESC
270 LIMIT 1
271 ),
272 (
273 SELECT country FROM autnum_overrides overrides
274 WHERE networks.autnum = overrides.number
275 ),
276 networks.country
277 ) AS country,
278
279 -- Flags
280 COALESCE(
281 (
282 SELECT is_anonymous_proxy FROM network_overrides overrides
283 WHERE networks.network <<= overrides.network
284 ORDER BY masklen(overrides.network) DESC
285 LIMIT 1
286 ),
287 (
288 SELECT is_anonymous_proxy FROM autnum_overrides overrides
289 WHERE networks.autnum = overrides.number
290 ),
291 FALSE
292 ) AS is_anonymous_proxy,
293 COALESCE(
294 (
295 SELECT is_satellite_provider FROM network_overrides overrides
296 WHERE networks.network <<= overrides.network
297 ORDER BY masklen(overrides.network) DESC
298 LIMIT 1
299 ),
300 (
301 SELECT is_satellite_provider FROM autnum_overrides overrides
302 WHERE networks.autnum = overrides.number
303 ),
304 FALSE
305 ) AS is_satellite_provider,
306 COALESCE(
307 (
308 SELECT is_anycast FROM network_overrides overrides
309 WHERE networks.network <<= overrides.network
310 ORDER BY masklen(overrides.network) DESC
311 LIMIT 1
312 ),
313 (
314 SELECT is_anycast FROM autnum_overrides overrides
315 WHERE networks.autnum = overrides.number
316 ),
317 FALSE
318 ) AS is_anycast,
319 COALESCE(
320 (
321 SELECT is_drop FROM network_overrides overrides
322 WHERE networks.network <<= overrides.network
323 ORDER BY masklen(overrides.network) DESC
324 LIMIT 1
325 ),
326 (
327 SELECT is_drop FROM autnum_overrides overrides
328 WHERE networks.autnum = overrides.number
329 ),
330 FALSE
331 ) AS is_drop
332 FROM (
333 SELECT
334 known_networks.network AS network,
335 announcements.autnum AS autnum,
336 networks.country AS country,
337
338 -- Must be part of returned values for ORDER BY clause
339 masklen(announcements.network) AS sort_a,
340 masklen(networks.network) AS sort_b
341 FROM (
342 SELECT network FROM announcements
343 UNION ALL
344 SELECT network FROM networks
345 UNION ALL
346 SELECT network FROM network_overrides
347 ) known_networks
348 LEFT JOIN
349 announcements ON known_networks.network <<= announcements.network
350 LEFT JOIN
351 networks ON known_networks.network <<= networks.network
352 ORDER BY
353 known_networks.network,
354 sort_a DESC,
355 sort_b DESC
356 ) networks
357 """)
358
359 for row in rows:
360 network = writer.add_network(row.network)
361
362 # Save country
363 if row.country:
364 network.country_code = row.country
365
366 # Save ASN
367 if row.autnum:
368 network.asn = row.autnum
369
370 # Set flags
371 if row.is_anonymous_proxy:
372 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
373
374 if row.is_satellite_provider:
375 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
376
377 if row.is_anycast:
378 network.set_flag(location.NETWORK_FLAG_ANYCAST)
379
380 if row.is_drop:
381 network.set_flag(location.NETWORK_FLAG_DROP)
382
383 # Add all countries
384 log.info("Writing countries...")
385 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
386
387 for row in rows:
388 c = writer.add_country(row.country_code)
389 c.continent_code = row.continent_code
390 c.name = row.name
391
392 # Write everything to file
393 log.info("Writing database to file...")
394 for file in ns.file:
395 writer.write(file)
396
397 def handle_update_whois(self, ns):
398 downloader = location.importer.Downloader()
399
400 # Download all sources
401 with self.db.transaction():
402 # Create some temporary tables to store parsed data
403 self.db.execute("""
404 CREATE TEMPORARY TABLE _autnums(number integer NOT NULL, organization text NOT NULL, source text NOT NULL)
405 ON COMMIT DROP;
406 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
407
408 CREATE TEMPORARY TABLE _organizations(handle text NOT NULL, name text NOT NULL, source text NOT NULL)
409 ON COMMIT DROP;
410 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
411
412 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL)
413 ON COMMIT DROP;
414 CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
415 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
416 """)
417
418 # Remove all previously imported content
419 self.db.execute("""
420 TRUNCATE TABLE networks;
421 """)
422
423 # Fetch all valid country codes to check parsed networks aganist...
424 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
425 validcountries = []
426
427 for row in rows:
428 validcountries.append(row.country_code)
429
430 for source_key in location.importer.WHOIS_SOURCES:
431 for single_url in location.importer.WHOIS_SOURCES[source_key]:
432 with downloader.request(single_url, return_blocks=True) as f:
433 for block in f:
434 self._parse_block(block, source_key, validcountries)
435
436 # Process all parsed networks from every RIR we happen to have access to,
437 # insert the largest network chunks into the networks table immediately...
438 families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
439
440 for family in (row.family for row in families):
441 smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
442
443 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
444 SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
445
446 # ... determine any other prefixes for this network family, ...
447 prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
448 WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family)
449
450 # ... and insert networks with this prefix in case they provide additional
451 # information (i. e. subnet of a larger chunk with a different country)
452 for prefix in (row.prefix for row in prefixes):
453 self.db.execute("""
454 WITH candidates AS (
455 SELECT
456 _rirdata.network,
457 _rirdata.country,
458 _rirdata.original_countries,
459 _rirdata.source
460 FROM
461 _rirdata
462 WHERE
463 family(_rirdata.network) = %s
464 AND
465 masklen(_rirdata.network) = %s
466 ),
467 filtered AS (
468 SELECT
469 DISTINCT ON (c.network)
470 c.network,
471 c.country,
472 c.original_countries,
473 c.source,
474 masklen(networks.network),
475 networks.country AS parent_country
476 FROM
477 candidates c
478 LEFT JOIN
479 networks
480 ON
481 c.network << networks.network
482 ORDER BY
483 c.network,
484 masklen(networks.network) DESC NULLS LAST
485 )
486 INSERT INTO
487 networks(network, country, original_countries, source)
488 SELECT
489 network,
490 country,
491 original_countries,
492 source
493 FROM
494 filtered
495 WHERE
496 parent_country IS NULL
497 OR
498 country <> parent_country
499 ON CONFLICT DO NOTHING""",
500 family, prefix,
501 )
502
503 self.db.execute("""
504 INSERT INTO autnums(number, name, source)
505 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
506 JOIN _organizations ON _autnums.organization = _organizations.handle
507 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
508 """)
509
510 # Download all extended sources
511 for source_key in location.importer.EXTENDED_SOURCES:
512 for single_url in location.importer.EXTENDED_SOURCES[source_key]:
513 with self.db.transaction():
514 # Download data
515 with downloader.request(single_url) as f:
516 for line in f:
517 self._parse_line(line, source_key, validcountries)
518
519 # Download and import (technical) AS names from ARIN
520 self._import_as_names_from_arin()
521
522 def _check_parsed_network(self, network):
523 """
524 Assistive function to detect and subsequently sort out parsed
525 networks from RIR data (both Whois and so-called "extended sources"),
526 which are or have...
527
528 (a) not globally routable (RFC 1918 space, et al.)
529 (b) covering a too large chunk of the IP address space (prefix length
530 is < 7 for IPv4 networks, and < 10 for IPv6)
531 (c) "0.0.0.0" or "::" as a network address
532 (d) are too small for being publicly announced (we have decided not to
533 process them at the moment, as they significantly enlarge our
534 database without providing very helpful additional information)
535
536 This unfortunately is necessary due to brain-dead clutter across
537 various RIR databases, causing mismatches and eventually disruptions.
538
539 We will return False in case a network is not suitable for adding
540 it to our database, and True otherwise.
541 """
542
543 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
544 return False
545
546 if not network.is_global:
547 log.debug("Skipping non-globally routable network: %s" % network)
548 return False
549
550 if network.version == 4:
551 if network.prefixlen < 7:
552 log.debug("Skipping too big IP chunk: %s" % network)
553 return False
554
555 if network.prefixlen > 24:
556 log.debug("Skipping network too small to be publicly announced: %s" % network)
557 return False
558
559 if str(network.network_address) == "0.0.0.0":
560 log.debug("Skipping network based on 0.0.0.0: %s" % network)
561 return False
562
563 elif network.version == 6:
564 if network.prefixlen < 10:
565 log.debug("Skipping too big IP chunk: %s" % network)
566 return False
567
568 if network.prefixlen > 48:
569 log.debug("Skipping network too small to be publicly announced: %s" % network)
570 return False
571
572 if str(network.network_address) == "::":
573 log.debug("Skipping network based on '::': %s" % network)
574 return False
575
576 else:
577 # This should not happen...
578 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
579 return False
580
581 # In case we have made it here, the network is considered to
582 # be suitable for libloc consumption...
583 return True
584
585 def _check_parsed_asn(self, asn):
586 """
587 Assistive function to filter Autonomous System Numbers not being suitable
588 for adding to our database. Returns False in such cases, and True otherwise.
589 """
590
591 for start, end in VALID_ASN_RANGES:
592 if start <= asn and end >= asn:
593 return True
594
595 log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn)
596 return False
597
598 def _parse_block(self, block, source_key, validcountries = None):
599 # Get first line to find out what type of block this is
600 line = block[0]
601
602 # aut-num
603 if line.startswith("aut-num:"):
604 return self._parse_autnum_block(block, source_key)
605
606 # inetnum
607 if line.startswith("inet6num:") or line.startswith("inetnum:"):
608 return self._parse_inetnum_block(block, source_key, validcountries)
609
610 # organisation
611 elif line.startswith("organisation:"):
612 return self._parse_org_block(block, source_key)
613
614 def _parse_autnum_block(self, block, source_key):
615 autnum = {}
616 for line in block:
617 # Split line
618 key, val = split_line(line)
619
620 if key == "aut-num":
621 m = re.match(r"^(AS|as)(\d+)", val)
622 if m:
623 autnum["asn"] = m.group(2)
624
625 elif key == "org":
626 autnum[key] = val.upper()
627
628 elif key == "descr":
629 # Save the first description line as well...
630 if not key in autnum:
631 autnum[key] = val
632
633 # Skip empty objects
634 if not autnum or not "asn" in autnum:
635 return
636
637 # Insert a dummy organisation handle into our temporary organisations
638 # table in case the AS does not have an organisation handle set, but
639 # has a description (a quirk often observed in APNIC area), so we can
640 # later display at least some string for this AS.
641 if not "org" in autnum:
642 if "descr" in autnum:
643 autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn")
644
645 self.db.execute("INSERT INTO _organizations(handle, name, source) \
646 VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING",
647 autnum.get("org"), autnum.get("descr"), source_key,
648 )
649 else:
650 log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \
651 autnum.get("asn"))
652 return
653
654 # Insert into database
655 self.db.execute("INSERT INTO _autnums(number, organization, source) \
656 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
657 organization = excluded.organization",
658 autnum.get("asn"), autnum.get("org"), source_key,
659 )
660
661 def _parse_inetnum_block(self, block, source_key, validcountries = None):
662 log.debug("Parsing inetnum block:")
663
664 inetnum = {}
665 for line in block:
666 log.debug(line)
667
668 # Split line
669 key, val = split_line(line)
670
671 # Filter any inetnum records which are only referring to IP space
672 # not managed by that specific RIR...
673 if key == "netname":
674 if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()):
675 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
676 return
677
678 if key == "inetnum":
679 start_address, delim, end_address = val.partition("-")
680
681 # Strip any excess space
682 start_address, end_address = start_address.rstrip(), end_address.strip()
683
684 # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22")
685 if start_address and not (delim or end_address):
686 try:
687 start_address = ipaddress.ip_network(start_address, strict=False)
688 except ValueError:
689 start_address = start_address.split("/")
690 ldigits = start_address[0].count(".")
691
692 # How many octets do we need to add?
693 # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.)
694 if ldigits == 1:
695 start_address = start_address[0] + ".0.0/" + start_address[1]
696 elif ldigits == 2:
697 start_address = start_address[0] + ".0/" + start_address[1]
698 else:
699 log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line)
700 return
701
702 try:
703 start_address = ipaddress.ip_network(start_address, strict=False)
704 except ValueError:
705 log.warning("Could not parse line in LACNIC DB format: %s" % line)
706 return
707
708 # Enumerate first and last IP address of this network
709 end_address = start_address[-1]
710 start_address = start_address[0]
711
712 else:
713 # Convert to IP address
714 try:
715 start_address = ipaddress.ip_address(start_address)
716 end_address = ipaddress.ip_address(end_address)
717 except ValueError:
718 log.warning("Could not parse line: %s" % line)
719 return
720
721 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
722
723 elif key == "inet6num":
724 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
725
726 elif key == "country":
727 val = val.upper()
728
729 # Catch RIR data objects with more than one country code...
730 if not key in inetnum:
731 inetnum[key] = []
732 else:
733 if val in inetnum.get("country"):
734 # ... but keep this list distinct...
735 continue
736
737 # When people set country codes to "UK", they actually mean "GB"
738 if val == "UK":
739 val = "GB"
740
741 inetnum[key].append(val)
742
743 # Skip empty objects
744 if not inetnum or not "country" in inetnum:
745 return
746
747 # Prepare skipping objects with unknown country codes...
748 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
749
750 # Iterate through all networks enumerated from above, check them for plausibility and insert
751 # them into the database, if _check_parsed_network() succeeded
752 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
753 if self._check_parsed_network(single_network):
754
755 # Skip objects with unknown country codes if they are valid to avoid log spam...
756 if validcountries and invalidcountries:
757 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
758 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
759 break
760
761 # Everything is fine here, run INSERT statement...
762 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
763 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
764 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
765 )
766
767 def _parse_org_block(self, block, source_key):
768 org = {}
769 for line in block:
770 # Split line
771 key, val = split_line(line)
772
773 if key == "organisation":
774 org[key] = val.upper()
775 elif key == "org-name":
776 org[key] = val
777
778 # Skip empty objects
779 if not org:
780 return
781
782 self.db.execute("INSERT INTO _organizations(handle, name, source) \
783 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
784 UPDATE SET name = excluded.name",
785 org.get("organisation"), org.get("org-name"), source_key,
786 )
787
788 def _parse_line(self, line, source_key, validcountries = None):
789 # Skip version line
790 if line.startswith("2"):
791 return
792
793 # Skip comments
794 if line.startswith("#"):
795 return
796
797 try:
798 registry, country_code, type, line = line.split("|", 3)
799 except:
800 log.warning("Could not parse line: %s" % line)
801 return
802
803 # Skip any lines that are for stats only or do not have a country
804 # code at all (avoids log spam below)
805 if not country_code or country_code == '*':
806 return
807
808 # Skip objects with unknown country codes
809 if validcountries and country_code not in validcountries:
810 log.warning("Skipping line with bogus country '%s': %s" % \
811 (country_code, line))
812 return
813
814 if type in ("ipv6", "ipv4"):
815 return self._parse_ip_line(country_code, type, line, source_key)
816
817 def _parse_ip_line(self, country, type, line, source_key):
818 try:
819 address, prefix, date, status, organization = line.split("|")
820 except ValueError:
821 organization = None
822
823 # Try parsing the line without organization
824 try:
825 address, prefix, date, status = line.split("|")
826 except ValueError:
827 log.warning("Unhandled line format: %s" % line)
828 return
829
830 # Skip anything that isn't properly assigned
831 if not status in ("assigned", "allocated"):
832 return
833
834 # Cast prefix into an integer
835 try:
836 prefix = int(prefix)
837 except:
838 log.warning("Invalid prefix: %s" % prefix)
839 return
840
841 # Fix prefix length for IPv4
842 if type == "ipv4":
843 prefix = 32 - int(math.log(prefix, 2))
844
845 # Try to parse the address
846 try:
847 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
848 except ValueError:
849 log.warning("Invalid IP address: %s" % address)
850 return
851
852 if not self._check_parsed_network(network):
853 return
854
855 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
856 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
857 UPDATE SET country = excluded.country",
858 "%s" % network, country, [country], source_key,
859 )
860
861 def _import_as_names_from_arin(self):
862 downloader = location.importer.Downloader()
863
864 # XXX: Download AS names file from ARIN (note that these names appear to be quite
865 # technical, not intended for human consumption, as description fields in
866 # organisation handles for other RIRs are - however, this is what we have got,
867 # and in some cases, it might be still better than nothing)
868 with downloader.request("https://ftp.arin.net/info/asn.txt", return_blocks=False) as f:
869 for line in f:
870 # Convert binary line to string...
871 line = str(line)
872
873 # ... valid lines start with a space, followed by the number of the Autonomous System ...
874 if not line.startswith(" "):
875 continue
876
877 # Split line and check if there is a valid ASN in it...
878 asn, name = line.split()[0:2]
879
880 try:
881 asn = int(asn)
882 except ValueError:
883 log.debug("Skipping ARIN AS names line not containing an integer for ASN")
884 continue
885
886 # Filter invalid ASNs...
887 if not self._check_parsed_asn(asn):
888 continue
889
890 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
891 if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|DNIC|LACNIC|RIPE|IANA)(?:\d?$|\-)", name):
892 continue
893
894 # Bail out in case the AS name contains anything we do not expect here...
895 if re.search(r"[^a-zA-Z0-9-_]", name):
896 log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
897 (asn, name))
898
899 # Things look good here, run INSERT statement and skip this one if we already have
900 # a (better?) name for this Autonomous System...
901 self.db.execute("""
902 INSERT INTO autnums(
903 number,
904 name,
905 source
906 ) VALUES (%s, %s, %s)
907 ON CONFLICT (number) DO NOTHING""",
908 asn,
909 name,
910 "ARIN",
911 )
912
913 def handle_update_announcements(self, ns):
914 server = ns.server[0]
915
916 with self.db.transaction():
917 if server.startswith("/"):
918 self._handle_update_announcements_from_bird(server)
919 else:
920 self._handle_update_announcements_from_telnet(server)
921
922 # Purge anything we never want here
923 self.db.execute("""
924 -- Delete default routes
925 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
926
927 -- Delete anything that is not global unicast address space
928 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
929
930 -- DELETE "current network" address space
931 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
932
933 -- DELETE local loopback address space
934 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
935
936 -- DELETE RFC 1918 address space
937 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
938 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
939 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
940
941 -- DELETE test, benchmark and documentation address space
942 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
943 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
944 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
945 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
946 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
947
948 -- DELETE CGNAT address space (RFC 6598)
949 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
950
951 -- DELETE link local address space
952 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
953
954 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
955 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
956 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
957
958 -- DELETE multicast and reserved address space
959 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
960 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
961
962 -- Delete networks that are too small to be in the global routing table
963 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
964 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
965
966 -- Delete any non-public or reserved ASNs
967 DELETE FROM announcements WHERE NOT (
968 (autnum >= 1 AND autnum <= 23455)
969 OR
970 (autnum >= 23457 AND autnum <= 64495)
971 OR
972 (autnum >= 131072 AND autnum <= 4199999999)
973 );
974
975 -- Delete everything that we have not seen for 14 days
976 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
977 """)
978
979 def _handle_update_announcements_from_bird(self, server):
980 # Pre-compile the regular expression for faster searching
981 route = re.compile(b"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$")
982
983 log.info("Requesting routing table from Bird (%s)" % server)
984
985 aggregated_networks = []
986
987 # Send command to list all routes
988 for line in self._bird_cmd(server, "show route"):
989 m = route.match(line)
990 if not m:
991 log.debug("Could not parse line: %s" % line.decode())
992 continue
993
994 # Fetch the extracted network and ASN
995 network, autnum = m.groups()
996
997 # Decode into strings
998 if network:
999 network = network.decode()
1000 if autnum:
1001 autnum = autnum.decode()
1002
1003 # Collect all aggregated networks
1004 if not autnum:
1005 log.debug("%s is an aggregated network" % network)
1006 aggregated_networks.append(network)
1007 continue
1008
1009 # Insert it into the database
1010 self.db.execute("INSERT INTO announcements(network, autnum) \
1011 VALUES(%s, %s) ON CONFLICT (network) DO \
1012 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1013 network, autnum,
1014 )
1015
1016 # Process any aggregated networks
1017 for network in aggregated_networks:
1018 log.debug("Processing aggregated network %s" % network)
1019
1020 # Run "show route all" for each network
1021 for line in self._bird_cmd(server, "show route %s all" % network):
1022 # Try finding the path
1023 m = re.match(b"\s+BGP\.as_path:.* (\d+) {\d+}$", line)
1024 if m:
1025 # Select the last AS number in the path
1026 autnum = m.group(1).decode()
1027
1028 # Insert it into the database
1029 self.db.execute("INSERT INTO announcements(network, autnum) \
1030 VALUES(%s, %s) ON CONFLICT (network) DO \
1031 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1032 network, autnum,
1033 )
1034
1035 # We don't need to process any more
1036 break
1037
1038 def _handle_update_announcements_from_telnet(self, server):
1039 # Pre-compile regular expression for routes
1040 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
1041
1042 with telnetlib.Telnet(server) as t:
1043 # Enable debug mode
1044 #if ns.debug:
1045 # t.set_debuglevel(10)
1046
1047 # Wait for console greeting
1048 greeting = t.read_until(b"> ", timeout=30)
1049 if not greeting:
1050 log.error("Could not get a console prompt")
1051 return 1
1052
1053 # Disable pagination
1054 t.write(b"terminal length 0\n")
1055
1056 # Wait for the prompt to return
1057 t.read_until(b"> ")
1058
1059 # Fetch the routing tables
1060 for protocol in ("ipv6", "ipv4"):
1061 log.info("Requesting %s routing table" % protocol)
1062
1063 # Request the full unicast routing table
1064 t.write(b"show bgp %s unicast\n" % protocol.encode())
1065
1066 # Read entire header which ends with "Path"
1067 t.read_until(b"Path\r\n")
1068
1069 while True:
1070 # Try reading a full entry
1071 # Those might be broken across multiple lines but ends with i
1072 line = t.read_until(b"i\r\n", timeout=5)
1073 if not line:
1074 break
1075
1076 # Show line for debugging
1077 #log.debug(repr(line))
1078
1079 # Try finding a route in here
1080 m = route.match(line)
1081 if m:
1082 network, autnum = m.groups()
1083
1084 # Convert network to string
1085 network = network.decode()
1086
1087 # Append /24 for IPv4 addresses
1088 if not "/" in network and not ":" in network:
1089 network = "%s/24" % network
1090
1091 # Convert AS number to integer
1092 autnum = int(autnum)
1093
1094 log.info("Found announcement for %s by %s" % (network, autnum))
1095
1096 self.db.execute("INSERT INTO announcements(network, autnum) \
1097 VALUES(%s, %s) ON CONFLICT (network) DO \
1098 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
1099 network, autnum,
1100 )
1101
1102 log.info("Finished reading the %s routing table" % protocol)
1103
1104 def _bird_cmd(self, socket_path, command):
1105 # Connect to the socket
1106 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1107 s.connect(socket_path)
1108
1109 # Allocate some buffer
1110 buffer = b""
1111
1112 log.debug("Sending Bird command: %s" % command)
1113
1114 # Send the command
1115 s.send(b"%s\n" % command.encode())
1116
1117 while True:
1118 # Fill up the buffer
1119 buffer += s.recv(4096)
1120
1121 while True:
1122 # Search for the next newline
1123 pos = buffer.find(b"\n")
1124
1125 # If we cannot find one, we go back and read more data
1126 if pos <= 0:
1127 break
1128
1129 # Cut after the newline character
1130 pos += 1
1131
1132 # Split the line we want and keep the rest in buffer
1133 line, buffer = buffer[:pos], buffer[pos:]
1134
1135 # Try parsing any status lines
1136 if len(line) > 4 and line[:4].isdigit() and line[4] in (32, 45):
1137 code, delim, line = int(line[:4]), line[4], line[5:]
1138
1139 log.debug("Received response code %s from bird" % code)
1140
1141 # End of output
1142 if code == 0:
1143 return
1144
1145 # Ignore hello line
1146 elif code == 1:
1147 continue
1148
1149 # Otherwise return the line
1150 yield line
1151
1152 def handle_update_overrides(self, ns):
1153 with self.db.transaction():
1154 # Drop all data that we have
1155 self.db.execute("""
1156 TRUNCATE TABLE autnum_overrides;
1157 TRUNCATE TABLE network_overrides;
1158 """)
1159
1160 # Update overrides for various cloud providers big enough to publish their own IP
1161 # network allocation lists in a machine-readable format...
1162 self._update_overrides_for_aws()
1163
1164 # Update overrides for Spamhaus DROP feeds...
1165 self._update_overrides_for_spamhaus_drop()
1166
1167 for file in ns.files:
1168 log.info("Reading %s..." % file)
1169
1170 with open(file, "rb") as f:
1171 for type, block in location.importer.read_blocks(f):
1172 if type == "net":
1173 network = block.get("net")
1174 # Try to parse and normalise the network
1175 try:
1176 network = ipaddress.ip_network(network, strict=False)
1177 except ValueError as e:
1178 log.warning("Invalid IP network: %s: %s" % (network, e))
1179 continue
1180
1181 # Prevent that we overwrite all networks
1182 if network.prefixlen == 0:
1183 log.warning("Skipping %s: You cannot overwrite default" % network)
1184 continue
1185
1186 self.db.execute("""
1187 INSERT INTO network_overrides(
1188 network,
1189 country,
1190 source,
1191 is_anonymous_proxy,
1192 is_satellite_provider,
1193 is_anycast,
1194 is_drop
1195 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
1196 ON CONFLICT (network) DO NOTHING""",
1197 "%s" % network,
1198 block.get("country"),
1199 "manual",
1200 self._parse_bool(block, "is-anonymous-proxy"),
1201 self._parse_bool(block, "is-satellite-provider"),
1202 self._parse_bool(block, "is-anycast"),
1203 self._parse_bool(block, "drop"),
1204 )
1205
1206 elif type == "aut-num":
1207 autnum = block.get("aut-num")
1208
1209 # Check if AS number begins with "AS"
1210 if not autnum.startswith("AS"):
1211 log.warning("Invalid AS number: %s" % autnum)
1212 continue
1213
1214 # Strip "AS"
1215 autnum = autnum[2:]
1216
1217 self.db.execute("""
1218 INSERT INTO autnum_overrides(
1219 number,
1220 name,
1221 country,
1222 source,
1223 is_anonymous_proxy,
1224 is_satellite_provider,
1225 is_anycast,
1226 is_drop
1227 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
1228 ON CONFLICT DO NOTHING""",
1229 autnum,
1230 block.get("name"),
1231 block.get("country"),
1232 "manual",
1233 self._parse_bool(block, "is-anonymous-proxy"),
1234 self._parse_bool(block, "is-satellite-provider"),
1235 self._parse_bool(block, "is-anycast"),
1236 self._parse_bool(block, "drop"),
1237 )
1238
1239 else:
1240 log.warning("Unsupported type: %s" % type)
1241
1242 def _update_overrides_for_aws(self):
1243 # Download Amazon AWS IP allocation file to create overrides...
1244 downloader = location.importer.Downloader()
1245
1246 try:
1247 with downloader.request("https://ip-ranges.amazonaws.com/ip-ranges.json", return_blocks=False) as f:
1248 aws_ip_dump = json.load(f.body)
1249 except Exception as e:
1250 log.error("unable to preprocess Amazon AWS IP ranges: %s" % e)
1251 return
1252
1253 # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately,
1254 # there seems to be no machine-readable version available of this other than
1255 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
1256 # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints
1257 # was helpful here as well.
1258 aws_region_country_map = {
1259 "af-south-1": "ZA",
1260 "ap-east-1": "HK",
1261 "ap-south-1": "IN",
1262 "ap-south-2": "IN",
1263 "ap-northeast-3": "JP",
1264 "ap-northeast-2": "KR",
1265 "ap-southeast-1": "SG",
1266 "ap-southeast-2": "AU",
1267 "ap-southeast-3": "MY",
1268 "ap-southeast-4": "AU",
1269 "ap-northeast-1": "JP",
1270 "ca-central-1": "CA",
1271 "eu-central-1": "DE",
1272 "eu-central-2": "CH",
1273 "eu-west-1": "IE",
1274 "eu-west-2": "GB",
1275 "eu-south-1": "IT",
1276 "eu-south-2": "ES",
1277 "eu-west-3": "FR",
1278 "eu-north-1": "SE",
1279 "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself
1280 "me-central-1": "AE",
1281 "me-south-1": "BH",
1282 "sa-east-1": "BR"
1283 }
1284
1285 # Fetch all valid country codes to check parsed networks aganist...
1286 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
1287 validcountries = []
1288
1289 for row in rows:
1290 validcountries.append(row.country_code)
1291
1292 with self.db.transaction():
1293 for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]:
1294 try:
1295 network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False)
1296 except ValueError:
1297 log.warning("Unable to parse line: %s" % snetwork)
1298 continue
1299
1300 # Sanitize parsed networks...
1301 if not self._check_parsed_network(network):
1302 continue
1303
1304 # Determine region of this network...
1305 region = snetwork["region"]
1306 cc = None
1307 is_anycast = False
1308
1309 # Any region name starting with "us-" will get "US" country code assigned straight away...
1310 if region.startswith("us-"):
1311 cc = "US"
1312 elif region.startswith("cn-"):
1313 # ... same goes for China ...
1314 cc = "CN"
1315 elif region == "GLOBAL":
1316 # ... funny region name for anycast-like networks ...
1317 is_anycast = True
1318 elif region in aws_region_country_map:
1319 # ... assign looked up country code otherwise ...
1320 cc = aws_region_country_map[region]
1321 else:
1322 # ... and bail out if we are missing something here
1323 log.warning("Unable to determine country code for line: %s" % snetwork)
1324 continue
1325
1326 # Skip networks with unknown country codes
1327 if not is_anycast and validcountries and cc not in validcountries:
1328 log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \
1329 (cc, network))
1330 return
1331
1332 # Conduct SQL statement...
1333 self.db.execute("""
1334 INSERT INTO network_overrides(
1335 network,
1336 country,
1337 source,
1338 is_anonymous_proxy,
1339 is_satellite_provider,
1340 is_anycast
1341 ) VALUES (%s, %s, %s, %s, %s, %s)
1342 ON CONFLICT (network) DO NOTHING""",
1343 "%s" % network,
1344 cc,
1345 "Amazon AWS IP feed",
1346 None,
1347 None,
1348 is_anycast,
1349 )
1350
1351
1352 def _update_overrides_for_spamhaus_drop(self):
1353 downloader = location.importer.Downloader()
1354
1355 ip_urls = [
1356 "https://www.spamhaus.org/drop/drop.txt",
1357 "https://www.spamhaus.org/drop/edrop.txt",
1358 "https://www.spamhaus.org/drop/dropv6.txt"
1359 ]
1360
1361 asn_urls = [
1362 "https://www.spamhaus.org/drop/asndrop.txt"
1363 ]
1364
1365 for url in ip_urls:
1366 try:
1367 with downloader.request(url, return_blocks=False) as f:
1368 fcontent = f.body.readlines()
1369 except Exception as e:
1370 log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e))
1371 return
1372
1373 # Iterate through every line, filter comments and add remaining networks to
1374 # the override table in case they are valid...
1375 with self.db.transaction():
1376 for sline in fcontent:
1377
1378 # The response is assumed to be encoded in UTF-8...
1379 sline = sline.decode("utf-8")
1380
1381 # Comments start with a semicolon...
1382 if sline.startswith(";"):
1383 continue
1384
1385 # Extract network and ignore anything afterwards...
1386 try:
1387 network = ipaddress.ip_network(sline.split()[0], strict=False)
1388 except ValueError:
1389 log.error("Unable to parse line: %s" % sline)
1390 continue
1391
1392 # Sanitize parsed networks...
1393 if not self._check_parsed_network(network):
1394 log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \
1395 (url, network))
1396 continue
1397
1398 # Conduct SQL statement...
1399 self.db.execute("""
1400 INSERT INTO network_overrides(
1401 network,
1402 source,
1403 is_drop
1404 ) VALUES (%s, %s, %s)
1405 ON CONFLICT (network) DO UPDATE SET is_drop = True""",
1406 "%s" % network,
1407 "Spamhaus DROP lists",
1408 True
1409 )
1410
1411 for url in asn_urls:
1412 try:
1413 with downloader.request(url, return_blocks=False) as f:
1414 fcontent = f.body.readlines()
1415 except Exception as e:
1416 log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e))
1417 return
1418
1419 # Iterate through every line, filter comments and add remaining ASNs to
1420 # the override table in case they are valid...
1421 with self.db.transaction():
1422 for sline in fcontent:
1423
1424 # The response is assumed to be encoded in UTF-8...
1425 sline = sline.decode("utf-8")
1426
1427 # Comments start with a semicolon...
1428 if sline.startswith(";"):
1429 continue
1430
1431 # Throw away anything after the first space...
1432 sline = sline.split()[0]
1433
1434 # ... strip the "AS" prefix from it ...
1435 sline = sline.strip("AS")
1436
1437 # ... and convert it into an integer. Voila.
1438 asn = int(sline)
1439
1440 # Filter invalid ASNs...
1441 if not self._check_parsed_asn(asn):
1442 log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \
1443 (url, asn))
1444 continue
1445
1446 # Conduct SQL statement...
1447 self.db.execute("""
1448 INSERT INTO autnum_overrides(
1449 number,
1450 source,
1451 is_drop
1452 ) VALUES (%s, %s, %s)
1453 ON CONFLICT (number) DO UPDATE SET is_drop = True""",
1454 "%s" % asn,
1455 "Spamhaus ASN-DROP list",
1456 True
1457 )
1458
1459 @staticmethod
1460 def _parse_bool(block, key):
1461 val = block.get(key)
1462
1463 # There is no point to proceed when we got None
1464 if val is None:
1465 return
1466
1467 # Convert to lowercase
1468 val = val.lower()
1469
1470 # True
1471 if val in ("yes", "1"):
1472 return True
1473
1474 # False
1475 if val in ("no", "0"):
1476 return False
1477
1478 # Default to None
1479 return None
1480
1481 def handle_import_countries(self, ns):
1482 with self.db.transaction():
1483 # Drop all data that we have
1484 self.db.execute("TRUNCATE TABLE countries")
1485
1486 for file in ns.file:
1487 for line in file:
1488 line = line.rstrip()
1489
1490 # Ignore any comments
1491 if line.startswith("#"):
1492 continue
1493
1494 try:
1495 country_code, continent_code, name = line.split(maxsplit=2)
1496 except:
1497 log.warning("Could not parse line: %s" % line)
1498 continue
1499
1500 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
1501 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
1502
1503
1504 def split_line(line):
1505 key, colon, val = line.partition(":")
1506
1507 # Strip any excess space
1508 key = key.strip()
1509 val = val.strip()
1510
1511 return key, val
1512
1513 def main():
1514 # Run the command line interface
1515 c = CLI()
1516 c.run()
1517
1518 main()