]> git.ipfire.org Git - people/ms/libloc.git/blob - src/python/location-importer.in
location-importer.in: add source column for overrides as well
[people/ms/libloc.git] / src / python / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020-2021 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import logging
23 import math
24 import re
25 import socket
26 import sys
27 import telnetlib
28
29 # Load our location module
30 import location
31 import location.database
32 import location.importer
33 from location.i18n import _
34
35 # Initialise logging
36 log = logging.getLogger("location.importer")
37 log.propagate = 1
38
39 class CLI(object):
40 def parse_cli(self):
41 parser = argparse.ArgumentParser(
42 description=_("Location Importer Command Line Interface"),
43 )
44 subparsers = parser.add_subparsers()
45
46 # Global configuration flags
47 parser.add_argument("--debug", action="store_true",
48 help=_("Enable debug output"))
49 parser.add_argument("--quiet", action="store_true",
50 help=_("Enable quiet mode"))
51
52 # version
53 parser.add_argument("--version", action="version",
54 version="%(prog)s @VERSION@")
55
56 # Database
57 parser.add_argument("--database-host", required=True,
58 help=_("Database Hostname"), metavar=_("HOST"))
59 parser.add_argument("--database-name", required=True,
60 help=_("Database Name"), metavar=_("NAME"))
61 parser.add_argument("--database-username", required=True,
62 help=_("Database Username"), metavar=_("USERNAME"))
63 parser.add_argument("--database-password", required=True,
64 help=_("Database Password"), metavar=_("PASSWORD"))
65
66 # Write Database
67 write = subparsers.add_parser("write", help=_("Write database to file"))
68 write.set_defaults(func=self.handle_write)
69 write.add_argument("file", nargs=1, help=_("Database File"))
70 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
71 write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key"))
72 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
73 write.add_argument("--description", nargs="?", help=_("Sets a description"))
74 write.add_argument("--license", nargs="?", help=_("Sets the license"))
75 write.add_argument("--version", type=int, help=_("Database Format Version"))
76
77 # Update WHOIS
78 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
79 update_whois.set_defaults(func=self.handle_update_whois)
80
81 # Update announcements
82 update_announcements = subparsers.add_parser("update-announcements",
83 help=_("Update BGP Annoucements"))
84 update_announcements.set_defaults(func=self.handle_update_announcements)
85 update_announcements.add_argument("server", nargs=1,
86 help=_("Route Server to connect to"), metavar=_("SERVER"))
87
88 # Update overrides
89 update_overrides = subparsers.add_parser("update-overrides",
90 help=_("Update overrides"),
91 )
92 update_overrides.add_argument(
93 "files", nargs="+", help=_("Files to import"),
94 )
95 update_overrides.set_defaults(func=self.handle_update_overrides)
96
97 # Import countries
98 import_countries = subparsers.add_parser("import-countries",
99 help=_("Import countries"),
100 )
101 import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"),
102 help=_("File to import"))
103 import_countries.set_defaults(func=self.handle_import_countries)
104
105 args = parser.parse_args()
106
107 # Configure logging
108 if args.debug:
109 location.logger.set_level(logging.DEBUG)
110 elif args.quiet:
111 location.logger.set_level(logging.WARNING)
112
113 # Print usage if no action was given
114 if not "func" in args:
115 parser.print_usage()
116 sys.exit(2)
117
118 return args
119
120 def run(self):
121 # Parse command line arguments
122 args = self.parse_cli()
123
124 # Initialise database
125 self.db = self._setup_database(args)
126
127 # Call function
128 ret = args.func(args)
129
130 # Return with exit code
131 if ret:
132 sys.exit(ret)
133
134 # Otherwise just exit
135 sys.exit(0)
136
137 def _setup_database(self, ns):
138 """
139 Initialise the database
140 """
141 # Connect to database
142 db = location.database.Connection(
143 host=ns.database_host, database=ns.database_name,
144 user=ns.database_username, password=ns.database_password,
145 )
146
147 with db.transaction():
148 db.execute("""
149 -- announcements
150 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
151 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
152 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
153 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
154 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
155 CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops);
156
157 -- autnums
158 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
159 ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text NOT NULL;
160 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
161
162 -- countries
163 CREATE TABLE IF NOT EXISTS countries(
164 country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL);
165 CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code);
166
167 -- networks
168 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
169 ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[];
170 ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text NOT NULL;
171 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
172 CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network));
173 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
174
175 -- overrides
176 CREATE TABLE IF NOT EXISTS autnum_overrides(
177 number bigint NOT NULL,
178 name text,
179 country text,
180 is_anonymous_proxy boolean,
181 is_satellite_provider boolean,
182 is_anycast boolean
183 );
184 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
185 ON autnum_overrides(number);
186 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text;
187 ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
188
189 CREATE TABLE IF NOT EXISTS network_overrides(
190 network inet NOT NULL,
191 country text,
192 is_anonymous_proxy boolean,
193 is_satellite_provider boolean,
194 is_anycast boolean
195 );
196 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
197 ON network_overrides(network);
198 CREATE INDEX IF NOT EXISTS network_overrides_search
199 ON network_overrides USING GIST(network inet_ops);
200 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text;
201 ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean;
202 """)
203
204 return db
205
206 def handle_write(self, ns):
207 """
208 Compiles a database in libloc format out of what is in the database
209 """
210 # Allocate a writer
211 writer = location.Writer(ns.signing_key, ns.backup_signing_key)
212
213 # Set all metadata
214 if ns.vendor:
215 writer.vendor = ns.vendor
216
217 if ns.description:
218 writer.description = ns.description
219
220 if ns.license:
221 writer.license = ns.license
222
223 # Add all Autonomous Systems
224 log.info("Writing Autonomous Systems...")
225
226 # Select all ASes with a name
227 rows = self.db.query("""
228 SELECT
229 autnums.number AS number,
230 COALESCE(
231 (SELECT overrides.name FROM autnum_overrides overrides
232 WHERE overrides.number = autnums.number),
233 autnums.name
234 ) AS name
235 FROM autnums
236 WHERE name <> %s ORDER BY number
237 """, "")
238
239 for row in rows:
240 a = writer.add_as(row.number)
241 a.name = row.name
242
243 # Add all networks
244 log.info("Writing networks...")
245
246 # Select all known networks
247 rows = self.db.query("""
248 -- Return a list of those networks enriched with all
249 -- other information that we store in the database
250 SELECT
251 DISTINCT ON (network)
252 network,
253 autnum,
254
255 -- Country
256 COALESCE(
257 (
258 SELECT country FROM network_overrides overrides
259 WHERE networks.network <<= overrides.network
260 ORDER BY masklen(overrides.network) DESC
261 LIMIT 1
262 ),
263 (
264 SELECT country FROM autnum_overrides overrides
265 WHERE networks.autnum = overrides.number
266 ),
267 networks.country
268 ) AS country,
269
270 -- Flags
271 COALESCE(
272 (
273 SELECT is_anonymous_proxy FROM network_overrides overrides
274 WHERE networks.network <<= overrides.network
275 ORDER BY masklen(overrides.network) DESC
276 LIMIT 1
277 ),
278 (
279 SELECT is_anonymous_proxy FROM autnum_overrides overrides
280 WHERE networks.autnum = overrides.number
281 ),
282 FALSE
283 ) AS is_anonymous_proxy,
284 COALESCE(
285 (
286 SELECT is_satellite_provider FROM network_overrides overrides
287 WHERE networks.network <<= overrides.network
288 ORDER BY masklen(overrides.network) DESC
289 LIMIT 1
290 ),
291 (
292 SELECT is_satellite_provider FROM autnum_overrides overrides
293 WHERE networks.autnum = overrides.number
294 ),
295 FALSE
296 ) AS is_satellite_provider,
297 COALESCE(
298 (
299 SELECT is_anycast FROM network_overrides overrides
300 WHERE networks.network <<= overrides.network
301 ORDER BY masklen(overrides.network) DESC
302 LIMIT 1
303 ),
304 (
305 SELECT is_anycast FROM autnum_overrides overrides
306 WHERE networks.autnum = overrides.number
307 ),
308 FALSE
309 ) AS is_anycast,
310 COALESCE(
311 (
312 SELECT is_drop FROM network_overrides overrides
313 WHERE networks.network <<= overrides.network
314 ORDER BY masklen(overrides.network) DESC
315 LIMIT 1
316 ),
317 (
318 SELECT is_drop FROM autnum_overrides overrides
319 WHERE networks.autnum = overrides.number
320 ),
321 FALSE
322 ) AS is_drop
323 FROM (
324 SELECT
325 known_networks.network AS network,
326 announcements.autnum AS autnum,
327 networks.country AS country,
328
329 -- Must be part of returned values for ORDER BY clause
330 masklen(announcements.network) AS sort_a,
331 masklen(networks.network) AS sort_b
332 FROM (
333 SELECT network FROM announcements
334 UNION ALL
335 SELECT network FROM networks
336 UNION ALL
337 SELECT network FROM network_overrides
338 ) known_networks
339 LEFT JOIN
340 announcements ON known_networks.network <<= announcements.network
341 LEFT JOIN
342 networks ON known_networks.network <<= networks.network
343 ORDER BY
344 known_networks.network,
345 sort_a DESC,
346 sort_b DESC
347 ) networks
348 """)
349
350 for row in rows:
351 network = writer.add_network(row.network)
352
353 # Save country
354 if row.country:
355 network.country_code = row.country
356
357 # Save ASN
358 if row.autnum:
359 network.asn = row.autnum
360
361 # Set flags
362 if row.is_anonymous_proxy:
363 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
364
365 if row.is_satellite_provider:
366 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
367
368 if row.is_anycast:
369 network.set_flag(location.NETWORK_FLAG_ANYCAST)
370
371 if row.is_drop:
372 network.set_flag(location.NETWORK_FLAG_DROP)
373
374 # Add all countries
375 log.info("Writing countries...")
376 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
377
378 for row in rows:
379 c = writer.add_country(row.country_code)
380 c.continent_code = row.continent_code
381 c.name = row.name
382
383 # Write everything to file
384 log.info("Writing database to file...")
385 for file in ns.file:
386 writer.write(file)
387
388 def handle_update_whois(self, ns):
389 downloader = location.importer.Downloader()
390
391 # Download all sources
392 with self.db.transaction():
393 # Create some temporary tables to store parsed data
394 self.db.execute("""
395 CREATE TEMPORARY TABLE _autnums(number integer, organization text, source text NOT NULL)
396 ON COMMIT DROP;
397 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
398
399 CREATE TEMPORARY TABLE _organizations(handle text, name text NOT NULL, source text NOT NULL)
400 ON COMMIT DROP;
401 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
402
403 CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL)
404 ON COMMIT DROP;
405 CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network));
406 CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network);
407 """)
408
409 # Remove all previously imported content
410 self.db.execute("""
411 TRUNCATE TABLE networks;
412 """)
413
414 # Fetch all valid country codes to check parsed networks aganist...
415 rows = self.db.query("SELECT * FROM countries ORDER BY country_code")
416 validcountries = []
417
418 for row in rows:
419 validcountries.append(row.country_code)
420
421 for source_key in location.importer.WHOIS_SOURCES:
422 for single_url in location.importer.WHOIS_SOURCES[source_key]:
423 with downloader.request(single_url, return_blocks=True) as f:
424 for block in f:
425 self._parse_block(block, source_key, validcountries)
426
427 # Process all parsed networks from every RIR we happen to have access to,
428 # insert the largest network chunks into the networks table immediately...
429 families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)")
430
431 for family in (row.family for row in families):
432 smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family)
433
434 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
435 SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family)
436
437 # ... determine any other prefixes for this network family, ...
438 prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \
439 WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family)
440
441 # ... and insert networks with this prefix in case they provide additional
442 # information (i. e. subnet of a larger chunk with a different country)
443 for prefix in (row.prefix for row in prefixes):
444 self.db.execute("""
445 WITH candidates AS (
446 SELECT
447 _rirdata.network,
448 _rirdata.country,
449 _rirdata.original_countries,
450 _rirdata.source
451 FROM
452 _rirdata
453 WHERE
454 family(_rirdata.network) = %s
455 AND
456 masklen(_rirdata.network) = %s
457 ),
458 filtered AS (
459 SELECT
460 DISTINCT ON (c.network)
461 c.network,
462 c.country,
463 c.original_countries,
464 c.source,
465 masklen(networks.network),
466 networks.country AS parent_country
467 FROM
468 candidates c
469 LEFT JOIN
470 networks
471 ON
472 c.network << networks.network
473 ORDER BY
474 c.network,
475 masklen(networks.network) DESC NULLS LAST
476 )
477 INSERT INTO
478 networks(network, country, original_countries, source)
479 SELECT
480 network,
481 country,
482 original_countries,
483 source
484 FROM
485 filtered
486 WHERE
487 parent_country IS NULL
488 OR
489 country <> parent_country
490 ON CONFLICT DO NOTHING""",
491 family, prefix,
492 )
493
494 self.db.execute("""
495 INSERT INTO autnums(number, name, source)
496 SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums
497 JOIN _organizations ON _autnums.organization = _organizations.handle
498 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
499 """)
500
501 # Download all extended sources
502 for source_key in location.importer.EXTENDED_SOURCES:
503 for single_url in location.importer.EXTENDED_SOURCES[source_key]:
504 with self.db.transaction():
505 # Download data
506 with downloader.request(single_url) as f:
507 for line in f:
508 self._parse_line(line, source_key, validcountries)
509
510 # Download and import (technical) AS names from ARIN
511 self._import_as_names_from_arin()
512
513 def _check_parsed_network(self, network):
514 """
515 Assistive function to detect and subsequently sort out parsed
516 networks from RIR data (both Whois and so-called "extended sources"),
517 which are or have...
518
519 (a) not globally routable (RFC 1918 space, et al.)
520 (b) covering a too large chunk of the IP address space (prefix length
521 is < 7 for IPv4 networks, and < 10 for IPv6)
522 (c) "0.0.0.0" or "::" as a network address
523 (d) are too small for being publicly announced (we have decided not to
524 process them at the moment, as they significantly enlarge our
525 database without providing very helpful additional information)
526
527 This unfortunately is necessary due to brain-dead clutter across
528 various RIR databases, causing mismatches and eventually disruptions.
529
530 We will return False in case a network is not suitable for adding
531 it to our database, and True otherwise.
532 """
533
534 if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
535 return False
536
537 if not network.is_global:
538 log.debug("Skipping non-globally routable network: %s" % network)
539 return False
540
541 if network.version == 4:
542 if network.prefixlen < 7:
543 log.debug("Skipping too big IP chunk: %s" % network)
544 return False
545
546 if network.prefixlen > 24:
547 log.debug("Skipping network too small to be publicly announced: %s" % network)
548 return False
549
550 if str(network.network_address) == "0.0.0.0":
551 log.debug("Skipping network based on 0.0.0.0: %s" % network)
552 return False
553
554 elif network.version == 6:
555 if network.prefixlen < 10:
556 log.debug("Skipping too big IP chunk: %s" % network)
557 return False
558
559 if network.prefixlen > 48:
560 log.debug("Skipping network too small to be publicly announced: %s" % network)
561 return False
562
563 if str(network.network_address) == "::":
564 log.debug("Skipping network based on '::': %s" % network)
565 return False
566
567 else:
568 # This should not happen...
569 log.warning("Skipping network of unknown family, this should not happen: %s" % network)
570 return False
571
572 # In case we have made it here, the network is considered to
573 # be suitable for libloc consumption...
574 return True
575
576 def _parse_block(self, block, source_key, validcountries = None):
577 # Get first line to find out what type of block this is
578 line = block[0]
579
580 # aut-num
581 if line.startswith("aut-num:"):
582 return self._parse_autnum_block(block, source_key)
583
584 # inetnum
585 if line.startswith("inet6num:") or line.startswith("inetnum:"):
586 return self._parse_inetnum_block(block, source_key, validcountries)
587
588 # organisation
589 elif line.startswith("organisation:"):
590 return self._parse_org_block(block, source_key)
591
592 def _parse_autnum_block(self, block, source_key):
593 autnum = {}
594 for line in block:
595 # Split line
596 key, val = split_line(line)
597
598 if key == "aut-num":
599 m = re.match(r"^(AS|as)(\d+)", val)
600 if m:
601 autnum["asn"] = m.group(2)
602
603 elif key == "org":
604 autnum[key] = val.upper()
605
606 # Skip empty objects
607 if not autnum:
608 return
609
610 # Insert into database
611 self.db.execute("INSERT INTO _autnums(number, organization, source) \
612 VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
613 organization = excluded.organization",
614 autnum.get("asn"), autnum.get("org"), source_key,
615 )
616
617 def _parse_inetnum_block(self, block, source_key, validcountries = None):
618 log.debug("Parsing inetnum block:")
619
620 inetnum = {}
621 for line in block:
622 log.debug(line)
623
624 # Split line
625 key, val = split_line(line)
626
627 # Filter any inetnum records which are only referring to IP space
628 # not managed by that specific RIR...
629 if key == "netname":
630 if re.match(r"(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK)", val.strip()):
631 log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip())
632 return
633
634 if key == "inetnum":
635 start_address, delim, end_address = val.partition("-")
636
637 # Strip any excess space
638 start_address, end_address = start_address.rstrip(), end_address.strip()
639
640 # Convert to IP address
641 try:
642 start_address = ipaddress.ip_address(start_address)
643 end_address = ipaddress.ip_address(end_address)
644 except ValueError:
645 log.warning("Could not parse line: %s" % line)
646 return
647
648 inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address))
649
650 elif key == "inet6num":
651 inetnum[key] = [ipaddress.ip_network(val, strict=False)]
652
653 elif key == "country":
654 val = val.upper()
655
656 # Catch RIR data objects with more than one country code...
657 if not key in inetnum:
658 inetnum[key] = []
659 else:
660 if val in inetnum.get("country"):
661 # ... but keep this list distinct...
662 continue
663
664 inetnum[key].append(val)
665
666 # Skip empty objects
667 if not inetnum or not "country" in inetnum:
668 return
669
670 # Prepare skipping objects with unknown country codes...
671 invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries]
672
673 # Iterate through all networks enumerated from above, check them for plausibility and insert
674 # them into the database, if _check_parsed_network() succeeded
675 for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
676 if self._check_parsed_network(single_network):
677
678 # Skip objects with unknown country codes if they are valid to avoid log spam...
679 if validcountries and invalidcountries:
680 log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
681 (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
682
683 # Everything is fine here, run INSERT statement...
684 self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
685 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
686 "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
687 )
688
689 def _parse_org_block(self, block, source_key):
690 org = {}
691 for line in block:
692 # Split line
693 key, val = split_line(line)
694
695 if key == "organisation":
696 org[key] = val.upper()
697 elif key == "org-name":
698 org[key] = val
699
700 # Skip empty objects
701 if not org:
702 return
703
704 self.db.execute("INSERT INTO _organizations(handle, name, source) \
705 VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
706 UPDATE SET name = excluded.name",
707 org.get("organisation"), org.get("org-name"), source_key,
708 )
709
710 def _parse_line(self, line, source_key, validcountries = None):
711 # Skip version line
712 if line.startswith("2"):
713 return
714
715 # Skip comments
716 if line.startswith("#"):
717 return
718
719 try:
720 registry, country_code, type, line = line.split("|", 3)
721 except:
722 log.warning("Could not parse line: %s" % line)
723 return
724
725 # Skip any lines that are for stats only or do not have a country
726 # code at all (avoids log spam below)
727 if not country_code or country_code == '*':
728 return
729
730 # Skip objects with unknown country codes
731 if validcountries and country_code not in validcountries:
732 log.warning("Skipping line with bogus country '%s': %s" % \
733 (country_code, line))
734 return
735
736 if type in ("ipv6", "ipv4"):
737 return self._parse_ip_line(country_code, type, line, source_key)
738
739 def _parse_ip_line(self, country, type, line, source_key):
740 try:
741 address, prefix, date, status, organization = line.split("|")
742 except ValueError:
743 organization = None
744
745 # Try parsing the line without organization
746 try:
747 address, prefix, date, status = line.split("|")
748 except ValueError:
749 log.warning("Unhandled line format: %s" % line)
750 return
751
752 # Skip anything that isn't properly assigned
753 if not status in ("assigned", "allocated"):
754 return
755
756 # Cast prefix into an integer
757 try:
758 prefix = int(prefix)
759 except:
760 log.warning("Invalid prefix: %s" % prefix)
761 return
762
763 # Fix prefix length for IPv4
764 if type == "ipv4":
765 prefix = 32 - int(math.log(prefix, 2))
766
767 # Try to parse the address
768 try:
769 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
770 except ValueError:
771 log.warning("Invalid IP address: %s" % address)
772 return
773
774 if not self._check_parsed_network(network):
775 return
776
777 self.db.execute("INSERT INTO networks(network, country, original_countries, source) \
778 VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
779 UPDATE SET country = excluded.country",
780 "%s" % network, country, [country], source_key,
781 )
782
783 def _import_as_names_from_arin(self):
784 downloader = location.importer.Downloader()
785
786 # XXX: Download AS names file from ARIN (note that these names appear to be quite
787 # technical, not intended for human consumption, as description fields in
788 # organisation handles for other RIRs are - however, this is what we have got,
789 # and in some cases, it might be still better than nothing)
790 with downloader.request("https://ftp.arin.net/info/asn.txt", return_blocks=False) as f:
791 for line in f:
792 # Convert binary line to string...
793 line = str(line)
794
795 # ... valid lines start with a space, followed by the number of the Autonomous System ...
796 if not line.startswith(" "):
797 continue
798
799 # Split line and check if there is a valid ASN in it...
800 asn, name = line.split()[0:2]
801
802 try:
803 asn = int(asn)
804 except ValueError:
805 log.debug("Skipping ARIN AS names line not containing an integer for ASN")
806 continue
807
808 if not ((1 <= asn and asn <= 23455) or (23457 <= asn and asn <= 64495) or (131072 <= asn and asn <= 4199999999)):
809 log.debug("Skipping ARIN AS names line not containing a valid ASN: %s" % asn)
810 continue
811
812 # Skip any AS name that appears to be a placeholder for a different RIR or entity...
813 if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|DNIC|LACNIC|RIPE|IANA)(\d?$|\-.*)", name):
814 continue
815
816 # Bail out in case the AS name contains anything we do not expect here...
817 if re.search(r"[^a-zA-Z0-9-_]", name):
818 log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \
819 (asn, name))
820
821 # Things look good here, run INSERT statement and skip this one if we already have
822 # a (better?) name for this Autonomous System...
823 self.db.execute("""
824 INSERT INTO autnums(
825 number,
826 name,
827 source
828 ) VALUES (%s, %s, %s)
829 ON CONFLICT (number) DO NOTHING""",
830 asn,
831 name,
832 "ARIN",
833 )
834
835 def handle_update_announcements(self, ns):
836 server = ns.server[0]
837
838 with self.db.transaction():
839 if server.startswith("/"):
840 self._handle_update_announcements_from_bird(server)
841 else:
842 self._handle_update_announcements_from_telnet(server)
843
844 # Purge anything we never want here
845 self.db.execute("""
846 -- Delete default routes
847 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
848
849 -- Delete anything that is not global unicast address space
850 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
851
852 -- DELETE "current network" address space
853 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8';
854
855 -- DELETE local loopback address space
856 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8';
857
858 -- DELETE RFC 1918 address space
859 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
860 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
861 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
862
863 -- DELETE test, benchmark and documentation address space
864 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24';
865 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24';
866 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15';
867 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24';
868 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24';
869
870 -- DELETE CGNAT address space (RFC 6598)
871 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10';
872
873 -- DELETE link local address space
874 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16';
875
876 -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068)
877 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24';
878 DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16';
879
880 -- DELETE multicast and reserved address space
881 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4';
882 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4';
883
884 -- Delete networks that are too small to be in the global routing table
885 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
886 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
887
888 -- Delete any non-public or reserved ASNs
889 DELETE FROM announcements WHERE NOT (
890 (autnum >= 1 AND autnum <= 23455)
891 OR
892 (autnum >= 23457 AND autnum <= 64495)
893 OR
894 (autnum >= 131072 AND autnum <= 4199999999)
895 );
896
897 -- Delete everything that we have not seen for 14 days
898 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
899 """)
900
901 def _handle_update_announcements_from_bird(self, server):
902 # Pre-compile the regular expression for faster searching
903 route = re.compile(b"^\s(.+?)\s+.+?\[AS(.*?).\]$")
904
905 log.info("Requesting routing table from Bird (%s)" % server)
906
907 # Send command to list all routes
908 for line in self._bird_cmd(server, "show route"):
909 m = route.match(line)
910 if not m:
911 log.debug("Could not parse line: %s" % line.decode())
912 continue
913
914 # Fetch the extracted network and ASN
915 network, autnum = m.groups()
916
917 # Insert it into the database
918 self.db.execute("INSERT INTO announcements(network, autnum) \
919 VALUES(%s, %s) ON CONFLICT (network) DO \
920 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
921 network.decode(), autnum.decode(),
922 )
923
924 def _handle_update_announcements_from_telnet(self, server):
925 # Pre-compile regular expression for routes
926 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
927
928 with telnetlib.Telnet(server) as t:
929 # Enable debug mode
930 #if ns.debug:
931 # t.set_debuglevel(10)
932
933 # Wait for console greeting
934 greeting = t.read_until(b"> ", timeout=30)
935 if not greeting:
936 log.error("Could not get a console prompt")
937 return 1
938
939 # Disable pagination
940 t.write(b"terminal length 0\n")
941
942 # Wait for the prompt to return
943 t.read_until(b"> ")
944
945 # Fetch the routing tables
946 for protocol in ("ipv6", "ipv4"):
947 log.info("Requesting %s routing table" % protocol)
948
949 # Request the full unicast routing table
950 t.write(b"show bgp %s unicast\n" % protocol.encode())
951
952 # Read entire header which ends with "Path"
953 t.read_until(b"Path\r\n")
954
955 while True:
956 # Try reading a full entry
957 # Those might be broken across multiple lines but ends with i
958 line = t.read_until(b"i\r\n", timeout=5)
959 if not line:
960 break
961
962 # Show line for debugging
963 #log.debug(repr(line))
964
965 # Try finding a route in here
966 m = route.match(line)
967 if m:
968 network, autnum = m.groups()
969
970 # Convert network to string
971 network = network.decode()
972
973 # Append /24 for IPv4 addresses
974 if not "/" in network and not ":" in network:
975 network = "%s/24" % network
976
977 # Convert AS number to integer
978 autnum = int(autnum)
979
980 log.info("Found announcement for %s by %s" % (network, autnum))
981
982 self.db.execute("INSERT INTO announcements(network, autnum) \
983 VALUES(%s, %s) ON CONFLICT (network) DO \
984 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
985 network, autnum,
986 )
987
988 log.info("Finished reading the %s routing table" % protocol)
989
990 def _bird_cmd(self, socket_path, command):
991 # Connect to the socket
992 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
993 s.connect(socket_path)
994
995 # Allocate some buffer
996 buffer = b""
997
998 # Send the command
999 s.send(b"%s\n" % command.encode())
1000
1001 while True:
1002 # Fill up the buffer
1003 buffer += s.recv(4096)
1004
1005 while True:
1006 # Search for the next newline
1007 pos = buffer.find(b"\n")
1008
1009 # If we cannot find one, we go back and read more data
1010 if pos <= 0:
1011 break
1012
1013 # Cut after the newline character
1014 pos += 1
1015
1016 # Split the line we want and keep the rest in buffer
1017 line, buffer = buffer[:pos], buffer[pos:]
1018
1019 # Look for the end-of-output indicator
1020 if line == b"0000 \n":
1021 return
1022
1023 # Otherwise return the line
1024 yield line
1025
1026 def handle_update_overrides(self, ns):
1027 with self.db.transaction():
1028 # Drop all data that we have
1029 self.db.execute("""
1030 TRUNCATE TABLE autnum_overrides;
1031 TRUNCATE TABLE network_overrides;
1032 """)
1033
1034 for file in ns.files:
1035 log.info("Reading %s..." % file)
1036
1037 with open(file, "rb") as f:
1038 for type, block in location.importer.read_blocks(f):
1039 if type == "net":
1040 network = block.get("net")
1041 # Try to parse and normalise the network
1042 try:
1043 network = ipaddress.ip_network(network, strict=False)
1044 except ValueError as e:
1045 log.warning("Invalid IP network: %s: %s" % (network, e))
1046 continue
1047
1048 # Prevent that we overwrite all networks
1049 if network.prefixlen == 0:
1050 log.warning("Skipping %s: You cannot overwrite default" % network)
1051 continue
1052
1053 self.db.execute("""
1054 INSERT INTO network_overrides(
1055 network,
1056 country,
1057 source,
1058 is_anonymous_proxy,
1059 is_satellite_provider,
1060 is_anycast,
1061 is_drop
1062 ) VALUES (%s, %s, %s, %s, %s, %s, %s)
1063 ON CONFLICT (network) DO NOTHING""",
1064 "%s" % network,
1065 block.get("country"),
1066 "manual",
1067 self._parse_bool(block, "is-anonymous-proxy"),
1068 self._parse_bool(block, "is-satellite-provider"),
1069 self._parse_bool(block, "is-anycast"),
1070 self._parse_bool(block, "drop"),
1071 )
1072
1073 elif type == "aut-num":
1074 autnum = block.get("aut-num")
1075
1076 # Check if AS number begins with "AS"
1077 if not autnum.startswith("AS"):
1078 log.warning("Invalid AS number: %s" % autnum)
1079 continue
1080
1081 # Strip "AS"
1082 autnum = autnum[2:]
1083
1084 self.db.execute("""
1085 INSERT INTO autnum_overrides(
1086 number,
1087 name,
1088 country,
1089 source,
1090 is_anonymous_proxy,
1091 is_satellite_provider,
1092 is_anycast,
1093 is_drop
1094 ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
1095 ON CONFLICT DO NOTHING""",
1096 autnum,
1097 block.get("name"),
1098 block.get("country"),
1099 "manual",
1100 self._parse_bool(block, "is-anonymous-proxy"),
1101 self._parse_bool(block, "is-satellite-provider"),
1102 self._parse_bool(block, "is-anycast"),
1103 self._parse_bool(block, "drop"),
1104 )
1105
1106 else:
1107 log.warning("Unsupported type: %s" % type)
1108
1109 @staticmethod
1110 def _parse_bool(block, key):
1111 val = block.get(key)
1112
1113 # There is no point to proceed when we got None
1114 if val is None:
1115 return
1116
1117 # Convert to lowercase
1118 val = val.lower()
1119
1120 # True
1121 if val in ("yes", "1"):
1122 return True
1123
1124 # False
1125 if val in ("no", "0"):
1126 return False
1127
1128 # Default to None
1129 return None
1130
1131 def handle_import_countries(self, ns):
1132 with self.db.transaction():
1133 # Drop all data that we have
1134 self.db.execute("TRUNCATE TABLE countries")
1135
1136 for file in ns.file:
1137 for line in file:
1138 line = line.rstrip()
1139
1140 # Ignore any comments
1141 if line.startswith("#"):
1142 continue
1143
1144 try:
1145 country_code, continent_code, name = line.split(maxsplit=2)
1146 except:
1147 log.warning("Could not parse line: %s" % line)
1148 continue
1149
1150 self.db.execute("INSERT INTO countries(country_code, name, continent_code) \
1151 VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code)
1152
1153
1154 def split_line(line):
1155 key, colon, val = line.partition(":")
1156
1157 # Strip any excess space
1158 key = key.strip()
1159 val = val.strip()
1160
1161 return key, val
1162
1163 def main():
1164 # Run the command line interface
1165 c = CLI()
1166 c.run()
1167
1168 main()