]> git.ipfire.org Git - location/libloc.git/blob - src/python/location-importer.in
Implement an additional flag for hostile networks safe to drop
[location/libloc.git] / src / python / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020-2021 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import logging
23 import math
24 import re
25 import socket
26 import sys
27 import telnetlib
28
29 # Load our location module
30 import location
31 import location.database
32 import location.importer
33 from location.i18n import _
34
35 # Initialise logging
36 log = logging.getLogger("location.importer")
37 log.propagate = 1
38
39 class CLI(object):
40 def parse_cli(self):
41 parser = argparse.ArgumentParser(
42 description=_("Location Importer Command Line Interface"),
43 )
44 subparsers = parser.add_subparsers()
45
46 # Global configuration flags
47 parser.add_argument("--debug", action="store_true",
48 help=_("Enable debug output"))
49 parser.add_argument("--quiet", action="store_true",
50 help=_("Enable quiet mode"))
51
52 # version
53 parser.add_argument("--version", action="version",
54 version="%(prog)s @VERSION@")
55
56 # Database
57 parser.add_argument("--database-host", required=True,
58 help=_("Database Hostname"), metavar=_("HOST"))
59 parser.add_argument("--database-name", required=True,
60 help=_("Database Name"), metavar=_("NAME"))
61 parser.add_argument("--database-username", required=True,
62 help=_("Database Username"), metavar=_("USERNAME"))
63 parser.add_argument("--database-password", required=True,
64 help=_("Database Password"), metavar=_("PASSWORD"))
65
66 # Write Database
67 write = subparsers.add_parser("write", help=_("Write database to file"))
68 write.set_defaults(func=self.handle_write)
69 write.add_argument("file", nargs=1, help=_("Database File"))
70 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
71 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
72 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
73 write.add_argument("--description", nargs="?", help=_("Sets a description"))
74 write.add_argument("--license", nargs="?", help=_("Sets the license"))
75 write.add_argument("--version", type=int, help=_("Database Format Version"))
76
77 # Update WHOIS
78 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
79 update_whois.set_defaults(func=self.handle_update_whois)
80
81 # Update announcements
82 update_announcements = subparsers.add_parser("update-announcements",
83 help=_("Update BGP Annoucements"))
84 update_announcements.set_defaults(func=self.handle_update_announcements)
85 update_announcements.add_argument("server", nargs=1,
86 help=_("Route Server to connect to"), metavar=_("SERVER"))
87
88 # Update overrides
89 update_overrides = subparsers.add_parser("update-overrides",
90 help=_("Update overrides"),
91 )
92 update_overrides.add_argument(
93 "files", nargs="+", help=_("Files to import"),
94 )
95 update_overrides.set_defaults(func=self.handle_update_overrides)
96
97 # Import countries
98 import_countries = subparsers.add_parser("import-countries",
99 help=_("Import countries"),
100 )
101 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
102 help=_("File to import"))
103 import_countries.set_defaults(func=self.handle_import_countries)
104
105 args = parser.parse_args()
106
107 # Configure logging
108 if args.debug:
109 location.logger.set_level(logging.DEBUG)
110 elif args.quiet:
111 location.logger.set_level(logging.WARNING)
112
113 # Print usage if no action was given
114 if not "func" in args:
115 parser.print_usage()
116 sys.exit(2)
117
118 return args
119
120 def run(self):
121 # Parse command line arguments
122 args = self.parse_cli()
123
124 # Initialise database
125 self.db = self._setup_database(args)
126
127 # Call function
128 ret = args.func(args)
129
130 # Return with exit code
131 if ret:
132 sys.exit(ret)
133
134 # Otherwise just exit
135 sys.exit(0)
136
137 def _setup_database(self, ns):
138 """
139 Initialise the database
140 """
141 # Connect to database
142 db = location.database.Connection(
143 host=ns.database_host, database=ns.database_name,
144 user=ns.database_username, password=ns.database_password,
145 )
146
147 with db.transaction():
148 db.execute("""
149 -- announcements
150 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
151 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
152 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
153 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
154 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
155 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
156
157 -- autnums
158 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
159 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text NOT NULL;
160 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
161
162 -- countries
163 CREATE TABLE IF NOT EXISTS countries(
164 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
165 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
166
167 -- networks
168 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
169 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
170 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text NOT NULL;
171 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
172 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
173 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
174
175 -- overrides
176 CREATE TABLE IF NOT EXISTS autnum_overrides(
177 number bigint NOT NULL,
178 name text,
179 country text,
180 is_anonymous_proxy boolean,
181 is_satellite_provider boolean,
182 is_anycast boolean
183 );
184 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
185 ON autnum_overrides(number);
186 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
187
188 CREATE TABLE IF NOT EXISTS network_overrides(
189 network inet NOT NULL,
190 country text,
191 is_anonymous_proxy boolean,
192 is_satellite_provider boolean,
193 is_anycast boolean
194 );
195 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
196 ON network_overrides(network);
197 CREATE INDEX IF NOT EXISTS network_overrides_search
198 ON network_overrides USING GIST(network inet_ops);
199 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
200 """)
201
202 return db
203
204 def handle_write(self, ns):
205 """
206 Compiles a database in libloc format out of what is in the database
207 """
208 # Allocate a writer
209 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
210
211 # Set all metadata
212 if ns.vendor:
213 writer.vendor = ns.vendor
214
215 if ns.description:
216 writer.description = ns.description
217
218 if ns.license:
219 writer.license = ns.license
220
221 # Add all Autonomous Systems
222 log.info("Writing Autonomous Systems...")
223
224 # Select all ASes with a name
225 rows = self.db.query("""
226 SELECT
227 autnums.number AS number,
228 COALESCE(
229 (SELECT overrides.name FROM autnum_overrides overrides
230 WHERE overrides.number = autnums.number),
231 autnums.name
232 ) AS name
233 FROM autnums
234 WHERE name <> %s ORDER BY number
235 """, "")
236
237 for row in rows:
238 a = writer.add_as(row.number)
239 a.name = row.name
240
241 # Add all networks
242 log.info("Writing networks...")
243
244 # Select all known networks
245 rows = self.db.query("""
246 -- Return a list of those networks enriched with all
247 -- other information that we store in the database
248 SELECT
249 DISTINCT ON (network)
250 network,
251 autnum,
252
253 -- Country
254 COALESCE(
255 (
256 SELECT country FROM network_overrides overrides
257 WHERE networks.network <<= overrides.network
258 ORDER BY masklen(overrides.network) DESC
259 LIMIT 1
260 ),
261 (
262 SELECT country FROM autnum_overrides overrides
263 WHERE networks.autnum = overrides.number
264 ),
265 networks.country
266 ) AS country,
267
268 -- Flags
269 COALESCE(
270 (
271 SELECT is_anonymous_proxy FROM network_overrides overrides
272 WHERE networks.network <<= overrides.network
273 ORDER BY masklen(overrides.network) DESC
274 LIMIT 1
275 ),
276 (
277 SELECT is_anonymous_proxy FROM autnum_overrides overrides
278 WHERE networks.autnum = overrides.number
279 ),
280 FALSE
281 ) AS is_anonymous_proxy,
282 COALESCE(
283 (
284 SELECT is_satellite_provider FROM network_overrides overrides
285 WHERE networks.network <<= overrides.network
286 ORDER BY masklen(overrides.network) DESC
287 LIMIT 1
288 ),
289 (
290 SELECT is_satellite_provider FROM autnum_overrides overrides
291 WHERE networks.autnum = overrides.number
292 ),
293 FALSE
294 ) AS is_satellite_provider,
295 COALESCE(
296 (
297 SELECT is_anycast FROM network_overrides overrides
298 WHERE networks.network <<= overrides.network
299 ORDER BY masklen(overrides.network) DESC
300 LIMIT 1
301 ),
302 (
303 SELECT is_anycast FROM autnum_overrides overrides
304 WHERE networks.autnum = overrides.number
305 ),
306 FALSE
307 ) AS is_anycast,
308 COALESCE(
309 (
310 SELECT is_drop FROM network_overrides overrides
311 WHERE networks.network <<= overrides.network
312 ORDER BY masklen(overrides.network) DESC
313 LIMIT 1
314 ),
315 (
316 SELECT is_drop FROM autnum_overrides overrides
317 WHERE networks.autnum = overrides.number
318 ),
319 FALSE
320 ) AS is_drop
321 FROM (
322 SELECT
323 known_networks.network AS network,
324 announcements.autnum AS autnum,
325 networks.country AS country,
326
327 -- Must be part of returned values for ORDER BY clause
328 masklen(announcements.network) AS sort_a,
329 masklen(networks.network) AS sort_b
330 FROM (
331 SELECT network FROM announcements
332 UNION ALL
333 SELECT network FROM networks
334 UNION ALL
335 SELECT network FROM network_overrides
336 ) known_networks
337 LEFT JOIN
338 announcements ON known_networks.network <<= announcements.network
339 LEFT JOIN
340 networks ON known_networks.network <<= networks.network
341 ORDER BY
342 known_networks.network,
343 sort_a DESC,
344 sort_b DESC
345 ) networks
346 """)
347
348 for row in rows:
349 network = writer.add_network(row.network)
350
351 # Save country
352 if row.country:
353 network.country_code = row.country
354
355 # Save ASN
356 if row.autnum:
357 network.asn = row.autnum
358
359 # Set flags
360 if row.is_anonymous_proxy:
361 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
362
363 if row.is_satellite_provider:
364 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
365
366 if row.is_anycast:
367 network.set_flag(location.NETWORK_FLAG_ANYCAST)
368
369 if row.is_drop:
370 network.set_flag(location.NETWORK_FLAG_DROP)
371
372 # Add all countries
373 log.info("Writing countries...")
374 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
375
376 for row in rows:
377 c = writer.add_country(row.country_code)
378 c.continent_code = row.continent_code
379 c.name = row.name
380
381 # Write everything to file
382 log.info("Writing database to file...")
383 for file in ns.file:
384 writer.write(file)
385
386 def handle_update_whois(self, ns):
387 downloader = location.importer.Downloader()
388
389 # Download all sources
390 with self.db.transaction():
391 # Create some temporary tables to store parsed data
392 self.db.execute("""
393 CREATE TEMPORARY TABLE _autnums(number integer, organization text, source text NOT NULL)
394 ON COMMIT DROP;
395 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
396
397 CREATE TEMPORARY TABLE _organizations(handle text, name text NOT NULL, source text NOT NULL)
398 ON COMMIT DROP;
399 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
400
401 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL)
402 ON COMMIT DROP;
403 CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
404 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
405 """)
406
407 # Remove all previously imported content
408 self.db.execute("""
409 TRUNCATE TABLE networks;
410 """)
411
412 # Fetch all valid country codes to check parsed networks aganist...
413 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
414 validcountries = []
415
416 for row in rows:
417 validcountries.append(row.country_code)
418
419 for source_key in location.importer.WHOIS_SOURCES:
420 for single_url in location.importer.WHOIS_SOURCES[source_key]:
421 with downloader.request(single_url, return_blocks=True) as f:
422 for block in f:
423 self._parse_block(block, source_key, validcountries)
424
425 # Process all parsed networks from every RIR we happen to have access to,
426 # insert the largest network chunks into the networks table immediately...
427 families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
428
429 for family in (row.family for row in families):
430 smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
431
432 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
433 SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
434
435 # ... determine any other prefixes for this network family, ...
436 prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
437 WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family)
438
439 # ... and insert networks with this prefix in case they provide additional
440 # information (i. e. subnet of a larger chunk with a different country)
441 for prefix in (row.prefix for row in prefixes):
442 self.db.execute("""
443 WITH candidates AS (
444 SELECT
445 _rirdata.network,
446 _rirdata.country,
447 _rirdata.original_countries,
448 _rirdata.source
449 FROM
450 _rirdata
451 WHERE
452 family(_rirdata.network) = %s
453 AND
454 masklen(_rirdata.network) = %s
455 ),
456 filtered AS (
457 SELECT
458 DISTINCT ON (c.network)
459 c.network,
460 c.country,
461 c.original_countries,
462 c.source,
463 masklen(networks.network),
464 networks.country AS parent_country
465 FROM
466 candidates c
467 LEFT JOIN
468 networks
469 ON
470 c.network << networks.network
471 ORDER BY
472 c.network,
473 masklen(networks.network) DESC NULLS LAST
474 )
475 INSERT INTO
476 networks(network, country, original_countries, source)
477 SELECT
478 network,
479 country,
480 original_countries,
481 source
482 FROM
483 filtered
484 WHERE
485 parent_country IS NULL
486 OR
487 country <> parent_country
488 ON CONFLICT DO NOTHING""",
489 family, prefix,
490 )
491
492 self.db.execute("""
493 INSERT INTO autnums(number, name, source)
494 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
495 JOIN _organizations ON _autnums.organization = _organizations.handle
496 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
497 """)
498
499 # Download all extended sources
500 for source_key in location.importer.EXTENDED_SOURCES:
501 for single_url in location.importer.EXTENDED_SOURCES[source_key]:
502 with self.db.transaction():
503 # Download data
504 with downloader.request(single_url) as f:
505 for line in f:
506 self._parse_line(line, source_key, validcountries)
507
508 def _check_parsed_network(self, network):
509 """
510 Assistive function to detect and subsequently sort out parsed
511 networks from RIR data (both Whois and so-called "extended sources"),
512 which are or have...
513
514 (a) not globally routable (RFC 1918 space, et al.)
515 (b) covering a too large chunk of the IP address space (prefix length
516 is < 7 for IPv4 networks, and < 10 for IPv6)
517 (c) "0.0.0.0" or "::" as a network address
518 (d) are too small for being publicly announced (we have decided not to
519 process them at the moment, as they significantly enlarge our
520 database without providing very helpful additional information)
521
522 This unfortunately is necessary due to brain-dead clutter across
523 various RIR databases, causing mismatches and eventually disruptions.
524
525 We will return False in case a network is not suitable for adding
526 it to our database, and True otherwise.
527 """
528
529 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
530 return False
531
532 if not network.is_global:
533 log.debug("Skipping non-globally routable network: %s" % network)
534 return False
535
536 if network.version == 4:
537 if network.prefixlen < 7:
538 log.debug("Skipping too big IP chunk: %s" % network)
539 return False
540
541 if network.prefixlen > 24:
542 log.debug("Skipping network too small to be publicly announced: %s" % network)
543 return False
544
545 if str(network.network_address) == "0.0.0.0":
546 log.debug("Skipping network based on 0.0.0.0: %s" % network)
547 return False
548
549 elif network.version == 6:
550 if network.prefixlen < 10:
551 log.debug("Skipping too big IP chunk: %s" % network)
552 return False
553
554 if network.prefixlen > 48:
555 log.debug("Skipping network too small to be publicly announced: %s" % network)
556 return False
557
558 if str(network.network_address) == "::":
559 log.debug("Skipping network based on '::': %s" % network)
560 return False
561
562 else:
563 # This should not happen...
564 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
565 return False
566
567 # In case we have made it here, the network is considered to
568 # be suitable for libloc consumption...
569 return True
570
571 def _parse_block(self, block, source_key, validcountries = None):
572 # Get first line to find out what type of block this is
573 line = block[0]
574
575 # aut-num
576 if line.startswith("aut-num:"):
577 return self._parse_autnum_block(block, source_key)
578
579 # inetnum
580 if line.startswith("inet6num:") or line.startswith("inetnum:"):
581 return self._parse_inetnum_block(block, source_key, validcountries)
582
583 # organisation
584 elif line.startswith("organisation:"):
585 return self._parse_org_block(block, source_key)
586
587 def _parse_autnum_block(self, block, source_key):
588 autnum = {}
589 for line in block:
590 # Split line
591 key, val = split_line(line)
592
593 if key == "aut-num":
594 m = re.match(r"^(AS|as)(\d+)", val)
595 if m:
596 autnum["asn"] = m.group(2)
597
598 elif key == "org":
599 autnum[key] = val.upper()
600
601 # Skip empty objects
602 if not autnum:
603 return
604
605 # Insert into database
606 self.db.execute("INSERT INTO _autnums(number, organization, source) \
607 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
608 organization = excluded.organization",
609 autnum.get("asn"), autnum.get("org"), source_key,
610 )
611
612 def _parse_inetnum_block(self, block, source_key, validcountries = None):
613 log.debug("Parsing inetnum block:")
614
615 inetnum = {}
616 for line in block:
617 log.debug(line)
618
619 # Split line
620 key, val = split_line(line)
621
622 # Filter any inetnum records which are only referring to IP space
623 # not managed by that specific RIR...
624 if key == "netname":
625 if re.match(r"(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK)", val.strip()):
626 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
627 return
628
629 if key == "inetnum":
630 start_address, delim, end_address = val.partition("-")
631
632 # Strip any excess space
633 start_address, end_address = start_address.rstrip(), end_address.strip()
634
635 # Convert to IP address
636 try:
637 start_address = ipaddress.ip_address(start_address)
638 end_address = ipaddress.ip_address(end_address)
639 except ValueError:
640 log.warning("Could not parse line: %s" % line)
641 return
642
643 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
644
645 elif key == "inet6num":
646 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
647
648 elif key == "country":
649 val = val.upper()
650
651 # Catch RIR data objects with more than one country code...
652 if not key in inetnum:
653 inetnum[key] = []
654 else:
655 if val in inetnum.get("country"):
656 # ... but keep this list distinct...
657 continue
658
659 inetnum[key].append(val)
660
661 # Skip empty objects
662 if not inetnum or not "country" in inetnum:
663 return
664
665 # Prepare skipping objects with unknown country codes...
666 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
667
668 # Iterate through all networks enumerated from above, check them for plausibility and insert
669 # them into the database, if _check_parsed_network() succeeded
670 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
671 if self._check_parsed_network(single_network):
672
673 # Skip objects with unknown country codes if they are valid to avoid log spam...
674 if validcountries and invalidcountries:
675 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
676 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
677
678 # Everything is fine here, run INSERT statement...
679 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
680 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
681 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
682 )
683
684 def _parse_org_block(self, block, source_key):
685 org = {}
686 for line in block:
687 # Split line
688 key, val = split_line(line)
689
690 if key == "organisation":
691 org[key] = val.upper()
692 elif key == "org-name":
693 org[key] = val
694
695 # Skip empty objects
696 if not org:
697 return
698
699 self.db.execute("INSERT INTO _organizations(handle, name, source) \
700 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
701 UPDATE SET name = excluded.name",
702 org.get("organisation"), org.get("org-name"), source_key,
703 )
704
705 def _parse_line(self, line, source_key, validcountries = None):
706 # Skip version line
707 if line.startswith("2"):
708 return
709
710 # Skip comments
711 if line.startswith("#"):
712 return
713
714 try:
715 registry, country_code, type, line = line.split("|", 3)
716 except:
717 log.warning("Could not parse line: %s" % line)
718 return
719
720 # Skip any lines that are for stats only or do not have a country
721 # code at all (avoids log spam below)
722 if not country_code or country_code == '*':
723 return
724
725 # Skip objects with unknown country codes
726 if validcountries and country_code not in validcountries:
727 log.warning("Skipping line with bogus country '%s': %s" % \
728 (country_code, line))
729 return
730
731 if type in ("ipv6", "ipv4"):
732 return self._parse_ip_line(country_code, type, line, source_key)
733
734 def _parse_ip_line(self, country, type, line, source_key):
735 try:
736 address, prefix, date, status, organization = line.split("|")
737 except ValueError:
738 organization = None
739
740 # Try parsing the line without organization
741 try:
742 address, prefix, date, status = line.split("|")
743 except ValueError:
744 log.warning("Unhandled line format: %s" % line)
745 return
746
747 # Skip anything that isn't properly assigned
748 if not status in ("assigned", "allocated"):
749 return
750
751 # Cast prefix into an integer
752 try:
753 prefix = int(prefix)
754 except:
755 log.warning("Invalid prefix: %s" % prefix)
756 return
757
758 # Fix prefix length for IPv4
759 if type == "ipv4":
760 prefix = 32 - int(math.log(prefix, 2))
761
762 # Try to parse the address
763 try:
764 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
765 except ValueError:
766 log.warning("Invalid IP address: %s" % address)
767 return
768
769 if not self._check_parsed_network(network):
770 return
771
772 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
773 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
774 UPDATE SET country = excluded.country",
775 "%s" % network, country, [country], source_key,
776 )
777
778 def handle_update_announcements(self, ns):
779 server = ns.server[0]
780
781 with self.db.transaction():
782 if server.startswith("/"):
783 self._handle_update_announcements_from_bird(server)
784 else:
785 self._handle_update_announcements_from_telnet(server)
786
787 # Purge anything we never want here
788 self.db.execute("""
789 -- Delete default routes
790 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
791
792 -- Delete anything that is not global unicast address space
793 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
794
795 -- DELETE "current network" address space
796 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
797
798 -- DELETE local loopback address space
799 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
800
801 -- DELETE RFC 1918 address space
802 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
803 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
804 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
805
806 -- DELETE test, benchmark and documentation address space
807 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
808 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
809 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
810 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
811 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
812
813 -- DELETE CGNAT address space (RFC 6598)
814 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
815
816 -- DELETE link local address space
817 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
818
819 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
820 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
821 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
822
823 -- DELETE multicast and reserved address space
824 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
825 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
826
827 -- Delete networks that are too small to be in the global routing table
828 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
829 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
830
831 -- Delete any non-public or reserved ASNs
832 DELETE FROM announcements WHERE NOT (
833 (autnum >= 1 AND autnum <= 23455)
834 OR
835 (autnum >= 23457 AND autnum <= 64495)
836 OR
837 (autnum >= 131072 AND autnum <= 4199999999)
838 );
839
840 -- Delete everything that we have not seen for 14 days
841 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
842 """)
843
844 def _handle_update_announcements_from_bird(self, server):
845 # Pre-compile the regular expression for faster searching
846 route = re.compile(b"^\s(.+?)\s+.+?\[AS(.*?).\]$")
847
848 log.info("Requesting routing table from Bird (%s)" % server)
849
850 # Send command to list all routes
851 for line in self._bird_cmd(server, "show route"):
852 m = route.match(line)
853 if not m:
854 log.debug("Could not parse line: %s" % line.decode())
855 continue
856
857 # Fetch the extracted network and ASN
858 network, autnum = m.groups()
859
860 # Insert it into the database
861 self.db.execute("INSERT INTO announcements(network, autnum) \
862 VALUES(%s, %s) ON CONFLICT (network) DO \
863 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
864 network.decode(), autnum.decode(),
865 )
866
867 def _handle_update_announcements_from_telnet(self, server):
868 # Pre-compile regular expression for routes
869 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
870
871 with telnetlib.Telnet(server) as t:
872 # Enable debug mode
873 #if ns.debug:
874 # t.set_debuglevel(10)
875
876 # Wait for console greeting
877 greeting = t.read_until(b"> ", timeout=30)
878 if not greeting:
879 log.error("Could not get a console prompt")
880 return 1
881
882 # Disable pagination
883 t.write(b"terminal length 0\n")
884
885 # Wait for the prompt to return
886 t.read_until(b"> ")
887
888 # Fetch the routing tables
889 for protocol in ("ipv6", "ipv4"):
890 log.info("Requesting %s routing table" % protocol)
891
892 # Request the full unicast routing table
893 t.write(b"show bgp %s unicast\n" % protocol.encode())
894
895 # Read entire header which ends with "Path"
896 t.read_until(b"Path\r\n")
897
898 while True:
899 # Try reading a full entry
900 # Those might be broken across multiple lines but ends with i
901 line = t.read_until(b"i\r\n", timeout=5)
902 if not line:
903 break
904
905 # Show line for debugging
906 #log.debug(repr(line))
907
908 # Try finding a route in here
909 m = route.match(line)
910 if m:
911 network, autnum = m.groups()
912
913 # Convert network to string
914 network = network.decode()
915
916 # Append /24 for IPv4 addresses
917 if not "/" in network and not ":" in network:
918 network = "%s/24" % network
919
920 # Convert AS number to integer
921 autnum = int(autnum)
922
923 log.info("Found announcement for %s by %s" % (network, autnum))
924
925 self.db.execute("INSERT INTO announcements(network, autnum) \
926 VALUES(%s, %s) ON CONFLICT (network) DO \
927 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
928 network, autnum,
929 )
930
931 log.info("Finished reading the %s routing table" % protocol)
932
933 def _bird_cmd(self, socket_path, command):
934 # Connect to the socket
935 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
936 s.connect(socket_path)
937
938 # Allocate some buffer
939 buffer = b""
940
941 # Send the command
942 s.send(b"%s\n" % command.encode())
943
944 while True:
945 # Fill up the buffer
946 buffer += s.recv(4096)
947
948 while True:
949 # Search for the next newline
950 pos = buffer.find(b"\n")
951
952 # If we cannot find one, we go back and read more data
953 if pos <= 0:
954 break
955
956 # Cut after the newline character
957 pos += 1
958
959 # Split the line we want and keep the rest in buffer
960 line, buffer = buffer[:pos], buffer[pos:]
961
962 # Look for the end-of-output indicator
963 if line == b"0000 \n":
964 return
965
966 # Otherwise return the line
967 yield line
968
969 def handle_update_overrides(self, ns):
970 with self.db.transaction():
971 # Drop all data that we have
972 self.db.execute("""
973 TRUNCATE TABLE autnum_overrides;
974 TRUNCATE TABLE network_overrides;
975 """)
976
977 for file in ns.files:
978 log.info("Reading %s..." % file)
979
980 with open(file, "rb") as f:
981 for type, block in location.importer.read_blocks(f):
982 if type == "net":
983 network = block.get("net")
984 # Try to parse and normalise the network
985 try:
986 network = ipaddress.ip_network(network, strict=False)
987 except ValueError as e:
988 log.warning("Invalid IP network: %s: %s" % (network, e))
989 continue
990
991 # Prevent that we overwrite all networks
992 if network.prefixlen == 0:
993 log.warning("Skipping %s: You cannot overwrite default" % network)
994 continue
995
996 self.db.execute("""
997 INSERT INTO network_overrides(
998 network,
999 country,
1000 is_anonymous_proxy,
1001 is_satellite_provider,
1002 is_anycast,
1003 is_drop
1004 ) VALUES (%s, %s, %s, %s, %s, %s)
1005 ON CONFLICT (network) DO NOTHING""",
1006 "%s" % network,
1007 block.get("country"),
1008 self._parse_bool(block, "is-anonymous-proxy"),
1009 self._parse_bool(block, "is-satellite-provider"),
1010 self._parse_bool(block, "is-anycast"),
1011 self._parse_bool(block, "drop"),
1012 )
1013
1014 elif type == "aut-num":
1015 autnum = block.get("aut-num")
1016
1017 # Check if AS number begins with "AS"
1018 if not autnum.startswith("AS"):
1019 log.warning("Invalid AS number: %s" % autnum)
1020 continue
1021
1022 # Strip "AS"
1023 autnum = autnum[2:]
1024
1025 self.db.execute("""
1026 INSERT INTO autnum_overrides(
1027 number,
1028 name,
1029 country,
1030 is_anonymous_proxy,
1031 is_satellite_provider,
1032 is_anycast,
1033 is_drop
1034 ) VALUES(%s, %s, %s, %s, %s, %s, %s)
1035 ON CONFLICT DO NOTHING""",
1036 autnum,
1037 block.get("name"),
1038 block.get("country"),
1039 self._parse_bool(block, "is-anonymous-proxy"),
1040 self._parse_bool(block, "is-satellite-provider"),
1041 self._parse_bool(block, "is-anycast"),
1042 self._parse_bool(block, "drop"),
1043 )
1044
1045 else:
1046 log.warning("Unsupported type: %s" % type)
1047
1048 @staticmethod
1049 def _parse_bool(block, key):
1050 val = block.get(key)
1051
1052 # There is no point to proceed when we got None
1053 if val is None:
1054 return
1055
1056 # Convert to lowercase
1057 val = val.lower()
1058
1059 # True
1060 if val in ("yes", "1"):
1061 return True
1062
1063 # False
1064 if val in ("no", "0"):
1065 return False
1066
1067 # Default to None
1068 return None
1069
1070 def handle_import_countries(self, ns):
1071 with self.db.transaction():
1072 # Drop all data that we have
1073 self.db.execute("TRUNCATE TABLE countries")
1074
1075 for file in ns.file:
1076 for line in file:
1077 line = line.rstrip()
1078
1079 # Ignore any comments
1080 if line.startswith("#"):
1081 continue
1082
1083 try:
1084 country_code, continent_code, name = line.split(maxsplit=2)
1085 except:
1086 log.warning("Could not parse line: %s" % line)
1087 continue
1088
1089 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
1090 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
1091
1092
1093 def split_line(line):
1094 key, colon, val = line.partition(":")
1095
1096 # Strip any excess space
1097 key = key.strip()
1098 val = val.strip()
1099
1100 return key, val
1101
1102 def main():
1103 # Run the command line interface
1104 c = CLI()
1105 c.run()
1106
1107 main()