]> git.ipfire.org Git - people/ms/libloc.git/blob - src/python/location-importer.in
python: Use overridden AS name when possible
[people/ms/libloc.git] / src / python / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import logging
23 import math
24 import re
25 import sys
26 import telnetlib
27
28 # Load our location module
29 import location
30 import location.database
31 import location.importer
32 from location.i18n import _
33
34 # Initialise logging
35 log = logging.getLogger("location.importer")
36 log.propagate = 1
37
38 class CLI(object):
39 def parse_cli(self):
40 parser = argparse.ArgumentParser(
41 description=_("Location Importer Command Line Interface"),
42 )
43 subparsers = parser.add_subparsers()
44
45 # Global configuration flags
46 parser.add_argument("--debug", action="store_true",
47 help=_("Enable debug output"))
48
49 # version
50 parser.add_argument("--version", action="version",
51 version="%(prog)s @VERSION@")
52
53 # Database
54 parser.add_argument("--database-host", required=True,
55 help=_("Database Hostname"), metavar=_("HOST"))
56 parser.add_argument("--database-name", required=True,
57 help=_("Database Name"), metavar=_("NAME"))
58 parser.add_argument("--database-username", required=True,
59 help=_("Database Username"), metavar=_("USERNAME"))
60 parser.add_argument("--database-password", required=True,
61 help=_("Database Password"), metavar=_("PASSWORD"))
62
63 # Write Database
64 write = subparsers.add_parser("write", help=_("Write database to file"))
65 write.set_defaults(func=self.handle_write)
66 write.add_argument("file", nargs=1, help=_("Database File"))
67 write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key"))
68 write.add_argument("--vendor", nargs="?", help=_("Sets the vendor"))
69 write.add_argument("--description", nargs="?", help=_("Sets a description"))
70 write.add_argument("--license", nargs="?", help=_("Sets the license"))
71
72 # Update WHOIS
73 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
74 update_whois.set_defaults(func=self.handle_update_whois)
75
76 # Update announcements
77 update_announcements = subparsers.add_parser("update-announcements",
78 help=_("Update BGP Annoucements"))
79 update_announcements.set_defaults(func=self.handle_update_announcements)
80 update_announcements.add_argument("server", nargs=1,
81 help=_("Route Server to connect to"), metavar=_("SERVER"))
82
83 # Update overrides
84 update_overrides = subparsers.add_parser("update-overrides",
85 help=_("Update overrides"),
86 )
87 update_overrides.add_argument(
88 "files", nargs="+", help=_("Files to import"),
89 )
90 update_overrides.set_defaults(func=self.handle_update_overrides)
91
92 args = parser.parse_args()
93
94 # Enable debug logging
95 if args.debug:
96 log.setLevel(logging.DEBUG)
97
98 # Print usage if no action was given
99 if not "func" in args:
100 parser.print_usage()
101 sys.exit(2)
102
103 return args
104
105 def run(self):
106 # Parse command line arguments
107 args = self.parse_cli()
108
109 # Initialise database
110 self.db = self._setup_database(args)
111
112 # Call function
113 ret = args.func(args)
114
115 # Return with exit code
116 if ret:
117 sys.exit(ret)
118
119 # Otherwise just exit
120 sys.exit(0)
121
122 def _setup_database(self, ns):
123 """
124 Initialise the database
125 """
126 # Connect to database
127 db = location.database.Connection(
128 host=ns.database_host, database=ns.database_name,
129 user=ns.database_username, password=ns.database_password,
130 )
131
132 with db.transaction():
133 db.execute("""
134 -- announcements
135 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
136 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
137 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
138 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
139 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
140
141 -- autnums
142 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL);
143 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
144
145 -- networks
146 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
147 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
148 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
149
150 -- overrides
151 CREATE TABLE IF NOT EXISTS autnum_overrides(
152 number bigint NOT NULL,
153 name text,
154 is_anonymous_proxy boolean DEFAULT FALSE,
155 is_satellite_provider boolean DEFAULT FALSE,
156 is_anycast boolean DEFAULT FALSE
157 );
158 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
159 ON autnum_overrides(number);
160
161 CREATE TABLE IF NOT EXISTS network_overrides(
162 network inet NOT NULL,
163 country text,
164 is_anonymous_proxy boolean DEFAULT FALSE,
165 is_satellite_provider boolean DEFAULT FALSE,
166 is_anycast boolean DEFAULT FALSE
167 );
168 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
169 ON network_overrides(network);
170 """)
171
172 return db
173
174 def handle_write(self, ns):
175 """
176 Compiles a database in libloc format out of what is in the database
177 """
178 print(ns)
179
180 # Allocate a writer
181 writer = location.Writer(ns.signing_key)
182
183 # Set all metadata
184 if ns.vendor:
185 writer.vendor = ns.vendor
186
187 if ns.description:
188 writer.description = ns.description
189
190 if ns.license:
191 writer.license = ns.license
192
193 # Add all Autonomous Systems
194 log.info("Writing Autonomous Systems...")
195
196 # Select all ASes with a name
197 rows = self.db.query("""
198 SELECT
199 autnums.number AS number,
200 COALESCE(
201 (SELECT overrides.name FROM autnum_overrides overrides
202 WHERE overrides.number = autnums.number),
203 autnums.name
204 ) AS name
205 FROM autnums
206 WHERE name <> %s ORDER BY number
207 """, "")
208
209 for row in rows:
210 a = writer.add_as(row.number)
211 a.name = row.name
212
213 # Add all networks
214 log.info("Writing networks...")
215
216 # Select all known networks
217 rows = self.db.query("""
218 SELECT
219 announcements.network AS network,
220 announcements.autnum AS autnum,
221 (
222 SELECT networks.country FROM networks
223 WHERE announcements.network <<= networks.network
224 ) AS country,
225
226 -- Flags
227 FALSE AS is_anonymous_proxy,
228 FALSE AS is_satellite_provider,
229 FALSE AS is_anycast
230 FROM announcements
231 """)
232
233 for row in rows:
234 network = writer.add_network(row.network)
235
236 # Save AS & country
237 network.asn, network.country_code = row.autnum, row.country
238
239 # Set flags
240 if row.is_anonymous_proxy:
241 network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY)
242
243 if row.is_satellite_provider:
244 network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER)
245
246 if row.is_anycast:
247 network.set_flag(location.NETWORK_FLAG_ANYCAST)
248
249 # Write everything to file
250 log.info("Writing database to file...")
251 for file in ns.file:
252 writer.write(file)
253
254 def handle_update_whois(self, ns):
255 downloader = location.importer.Downloader()
256
257 # Download all sources
258 with self.db.transaction():
259 # Create some temporary tables to store parsed data
260 self.db.execute("""
261 CREATE TEMPORARY TABLE _autnums(number integer, organization text)
262 ON COMMIT DROP;
263 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
264
265 CREATE TEMPORARY TABLE _organizations(handle text, name text)
266 ON COMMIT DROP;
267 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
268 """)
269
270 for source in location.importer.WHOIS_SOURCES:
271 with downloader.request(source, return_blocks=True) as f:
272 for block in f:
273 self._parse_block(block)
274
275 self.db.execute("""
276 INSERT INTO autnums(number, name)
277 SELECT _autnums.number, _organizations.name FROM _autnums
278 LEFT JOIN _organizations ON _autnums.organization = _organizations.handle
279 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
280 """)
281
282 # Download all extended sources
283 for source in location.importer.EXTENDED_SOURCES:
284 with self.db.transaction():
285 # Download data
286 with downloader.request(source) as f:
287 for line in f:
288 self._parse_line(line)
289
290 def _parse_block(self, block):
291 # Get first line to find out what type of block this is
292 line = block[0]
293
294 # aut-num
295 if line.startswith("aut-num:"):
296 return self._parse_autnum_block(block)
297
298 # organisation
299 elif line.startswith("organisation:"):
300 return self._parse_org_block(block)
301
302 def _parse_autnum_block(self, block):
303 autnum = {}
304 for line in block:
305 # Split line
306 key, val = split_line(line)
307
308 if key == "aut-num":
309 m = re.match(r"^(AS|as)(\d+)", val)
310 if m:
311 autnum["asn"] = m.group(2)
312
313 elif key == "org":
314 autnum[key] = val
315
316 # Skip empty objects
317 if not autnum:
318 return
319
320 # Insert into database
321 self.db.execute("INSERT INTO _autnums(number, organization) \
322 VALUES(%s, %s) ON CONFLICT (number) DO UPDATE SET \
323 organization = excluded.organization",
324 autnum.get("asn"), autnum.get("org"),
325 )
326
327 def _parse_org_block(self, block):
328 org = {}
329 for line in block:
330 # Split line
331 key, val = split_line(line)
332
333 if key in ("organisation", "org-name"):
334 org[key] = val
335
336 # Skip empty objects
337 if not org:
338 return
339
340 self.db.execute("INSERT INTO _organizations(handle, name) \
341 VALUES(%s, %s) ON CONFLICT (handle) DO \
342 UPDATE SET name = excluded.name",
343 org.get("organisation"), org.get("org-name"),
344 )
345
346 def _parse_line(self, line):
347 # Skip version line
348 if line.startswith("2"):
349 return
350
351 # Skip comments
352 if line.startswith("#"):
353 return
354
355 try:
356 registry, country_code, type, line = line.split("|", 3)
357 except:
358 log.warning("Could not parse line: %s" % line)
359 return
360
361 # Skip any lines that are for stats only
362 if country_code == "*":
363 return
364
365 if type in ("ipv6", "ipv4"):
366 return self._parse_ip_line(country_code, type, line)
367
368 def _parse_ip_line(self, country, type, line):
369 try:
370 address, prefix, date, status, organization = line.split("|")
371 except ValueError:
372 organization = None
373
374 # Try parsing the line without organization
375 try:
376 address, prefix, date, status = line.split("|")
377 except ValueError:
378 log.warning("Unhandled line format: %s" % line)
379 return
380
381 # Skip anything that isn't properly assigned
382 if not status in ("assigned", "allocated"):
383 return
384
385 # Cast prefix into an integer
386 try:
387 prefix = int(prefix)
388 except:
389 log.warning("Invalid prefix: %s" % prefix)
390
391 # Fix prefix length for IPv4
392 if type == "ipv4":
393 prefix = 32 - int(math.log(prefix, 2))
394
395 # Try to parse the address
396 try:
397 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
398 except ValueError:
399 log.warning("Invalid IP address: %s" % address)
400 return
401
402 self.db.execute("INSERT INTO networks(network, country) \
403 VALUES(%s, %s) ON CONFLICT (network) DO \
404 UPDATE SET country = excluded.country",
405 "%s" % network, country,
406 )
407
408 def handle_update_announcements(self, ns):
409 server = ns.server[0]
410
411 # Pre-compile regular expression for routes
412 #route = re.compile(b"^\*>?\s[\si]?([^\s]+)[.\s]*?(\d+)\si$", re.MULTILINE)
413 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
414
415 with telnetlib.Telnet(server) as t:
416 # Enable debug mode
417 #if ns.debug:
418 # t.set_debuglevel(10)
419
420 # Wait for console greeting
421 greeting = t.read_until(b"> ")
422 log.debug(greeting.decode())
423
424 # Disable pagination
425 t.write(b"terminal length 0\n")
426
427 # Wait for the prompt to return
428 t.read_until(b"> ")
429
430 # Fetch the routing tables
431 with self.db.transaction():
432 for protocol in ("ipv6", "ipv4"):
433 log.info("Requesting %s routing table" % protocol)
434
435 # Request the full unicast routing table
436 t.write(b"show bgp %s unicast\n" % protocol.encode())
437
438 # Read entire header which ends with "Path"
439 t.read_until(b"Path\r\n")
440
441 while True:
442 # Try reading a full entry
443 # Those might be broken across multiple lines but ends with i
444 line = t.read_until(b"i\r\n", timeout=5)
445 if not line:
446 break
447
448 # Show line for debugging
449 #log.debug(repr(line))
450
451 # Try finding a route in here
452 m = route.match(line)
453 if m:
454 network, autnum = m.groups()
455
456 # Convert network to string
457 network = network.decode()
458
459 # Convert AS number to integer
460 autnum = int(autnum)
461
462 log.info("Found announcement for %s by %s" % (network, autnum))
463
464 self.db.execute("INSERT INTO announcements(network, autnum) \
465 VALUES(%s, %s) ON CONFLICT (network) DO \
466 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
467 network, autnum,
468 )
469
470 log.info("Finished reading the %s routing table" % protocol)
471
472 # Purge anything we never want here
473 self.db.execute("""
474 -- Delete default routes
475 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
476
477 -- Delete anything that is not global unicast address space
478 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
479
480 -- DELETE RFC1918 address space
481 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
482 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
483 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
484
485 -- Delete networks that are too small to be in the global routing table
486 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
487 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
488
489 -- Delete any non-public or reserved ASNs
490 DELETE FROM announcements WHERE NOT (
491 (autnum >= 1 AND autnum <= 23455)
492 OR
493 (autnum >= 23457 AND autnum <= 64495)
494 OR
495 (autnum >= 131072 AND autnum <= 4199999999)
496 );
497
498 -- Delete everything that we have not seen for 14 days
499 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
500 """)
501
502 def handle_update_overrides(self, ns):
503 with self.db.transaction():
504 # Drop all data that we have
505 self.db.execute("""
506 TRUNCATE TABLE autnum_overrides;
507 TRUNCATE TABLE network_overrides;
508 """)
509
510 for file in ns.files:
511 log.info("Reading %s..." % file)
512
513 with open(file, "rb") as f:
514 for type, block in location.importer.read_blocks(f):
515 if type == "net":
516 network = block.get("net")
517 # Try to parse and normalise the network
518 try:
519 network = ipaddress.ip_network(network, strict=False)
520 except ValueError as e:
521 log.warning("Invalid IP network: %s: %s" % (network, e))
522 continue
523
524 self.db.execute("""
525 INSERT INTO network_overrides(
526 network,
527 country,
528 is_anonymous_proxy,
529 is_satellite_provider,
530 is_anycast
531 ) VALUES (%s, %s, %s, %s)
532 ON CONFLICT (network) DO NOTHING""",
533 "%s" % network,
534 block.get("country"),
535 block.get("is-anonymous-proxy") == "yes",
536 block.get("is-satellite-provider") == "yes",
537 block.get("is-anycast") == "yes",
538 )
539
540 elif type == "autnum":
541 autnum = block.get("autnum")
542
543 # Check if AS number begins with "AS"
544 if not autnum.startswith("AS"):
545 log.warning("Invalid AS number: %s" % autnum)
546 continue
547
548 # Strip "AS"
549 autnum = autnum[2:]
550
551 self.db.execute("""
552 INSERT INTO autnum_overrides(
553 number,
554 name,
555 is_anonymous_proxy,
556 is_satellite_provider,
557 is_anycast
558 ) VALUES(%s, %s, %s, %s, %s)
559 ON CONFLICT DO NOTHING""",
560 autnum, block.get("name"),
561 block.get("is-anonymous-proxy") == "yes",
562 block.get("is-satellite-provider") == "yes",
563 block.get("is-anycast") == "yes",
564 )
565
566 else:
567 log.warning("Unsupport type: %s" % type)
568
569
570 def split_line(line):
571 key, colon, val = line.partition(":")
572
573 # Strip any excess space
574 key = key.strip()
575 val = val.strip()
576
577 return key, val
578
579 def main():
580 # Run the command line interface
581 c = CLI()
582 c.run()
583
584 main()