]> git.ipfire.org Git - location/libloc.git/blob - src/python/location-importer.in
python: Implement importing override files into the database
[location/libloc.git] / src / python / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import logging
23 import math
24 import re
25 import sys
26 import telnetlib
27
28 # Load our location module
29 import location
30 import location.database
31 import location.importer
32 from location.i18n import _
33
34 # Initialise logging
35 log = logging.getLogger("location.importer")
36 log.propagate = 1
37
38 class CLI(object):
39 def parse_cli(self):
40 parser = argparse.ArgumentParser(
41 description=_("Location Importer Command Line Interface"),
42 )
43 subparsers = parser.add_subparsers()
44
45 # Global configuration flags
46 parser.add_argument("--debug", action="store_true",
47 help=_("Enable debug output"))
48
49 # version
50 parser.add_argument("--version", action="version",
51 version="%(prog)s @VERSION@")
52
53 # Database
54 parser.add_argument("--database-host", required=True,
55 help=_("Database Hostname"), metavar=_("HOST"))
56 parser.add_argument("--database-name", required=True,
57 help=_("Database Name"), metavar=_("NAME"))
58 parser.add_argument("--database-username", required=True,
59 help=_("Database Username"), metavar=_("USERNAME"))
60 parser.add_argument("--database-password", required=True,
61 help=_("Database Password"), metavar=_("PASSWORD"))
62
63 # Update WHOIS
64 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
65 update_whois.set_defaults(func=self.handle_update_whois)
66
67 # Update announcements
68 update_announcements = subparsers.add_parser("update-announcements",
69 help=_("Update BGP Annoucements"))
70 update_announcements.set_defaults(func=self.handle_update_announcements)
71 update_announcements.add_argument("server", nargs=1,
72 help=_("Route Server to connect to"), metavar=_("SERVER"))
73
74 # Update overrides
75 update_overrides = subparsers.add_parser("update-overrides",
76 help=_("Update overrides"),
77 )
78 update_overrides.add_argument(
79 "files", nargs="+", help=_("Files to import"),
80 )
81 update_overrides.set_defaults(func=self.handle_update_overrides)
82
83 args = parser.parse_args()
84
85 # Enable debug logging
86 if args.debug:
87 log.setLevel(logging.DEBUG)
88
89 # Print usage if no action was given
90 if not "func" in args:
91 parser.print_usage()
92 sys.exit(2)
93
94 return args
95
96 def run(self):
97 # Parse command line arguments
98 args = self.parse_cli()
99
100 # Initialise database
101 self.db = self._setup_database(args)
102
103 # Call function
104 ret = args.func(args)
105
106 # Return with exit code
107 if ret:
108 sys.exit(ret)
109
110 # Otherwise just exit
111 sys.exit(0)
112
113 def _setup_database(self, ns):
114 """
115 Initialise the database
116 """
117 # Connect to database
118 db = location.database.Connection(
119 host=ns.database_host, database=ns.database_name,
120 user=ns.database_username, password=ns.database_password,
121 )
122
123 with db.transaction():
124 db.execute("""
125 -- announcements
126 CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint,
127 first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
128 last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP);
129 CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network);
130 CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network));
131
132 -- autnums
133 CREATE TABLE IF NOT EXISTS autnums(number bigint, name text);
134 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
135
136 -- networks
137 CREATE TABLE IF NOT EXISTS networks(network inet, country text);
138 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
139 CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops);
140
141 -- overrides
142 CREATE TABLE IF NOT EXISTS autnum_overrides(
143 number bigint NOT NULL,
144 name text,
145 is_anonymous_proxy boolean DEFAULT FALSE,
146 is_satellite_provider boolean DEFAULT FALSE,
147 is_anycast boolean DEFAULT FALSE
148 );
149 CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number
150 ON autnum_overrides(number);
151
152 CREATE TABLE IF NOT EXISTS network_overrides(
153 network inet NOT NULL,
154 country text,
155 is_anonymous_proxy boolean DEFAULT FALSE,
156 is_satellite_provider boolean DEFAULT FALSE,
157 is_anycast boolean DEFAULT FALSE
158 );
159 CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network
160 ON network_overrides(network);
161 """)
162
163 return db
164
165 def handle_update_whois(self, ns):
166 downloader = location.importer.Downloader()
167
168 # Download all sources
169 with self.db.transaction():
170 # Create some temporary tables to store parsed data
171 self.db.execute("""
172 CREATE TEMPORARY TABLE _autnums(number integer, organization text)
173 ON COMMIT DROP;
174 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
175
176 CREATE TEMPORARY TABLE _organizations(handle text, name text)
177 ON COMMIT DROP;
178 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
179 """)
180
181 for source in location.importer.WHOIS_SOURCES:
182 with downloader.request(source, return_blocks=True) as f:
183 for block in f:
184 self._parse_block(block)
185
186 self.db.execute("""
187 INSERT INTO autnums(number, name)
188 SELECT _autnums.number, _organizations.name FROM _autnums
189 LEFT JOIN _organizations ON _autnums.organization = _organizations.handle
190 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
191 """)
192
193 # Download all extended sources
194 for source in location.importer.EXTENDED_SOURCES:
195 with self.db.transaction():
196 # Download data
197 with downloader.request(source) as f:
198 for line in f:
199 self._parse_line(line)
200
201 def _parse_block(self, block):
202 # Get first line to find out what type of block this is
203 line = block[0]
204
205 # aut-num
206 if line.startswith("aut-num:"):
207 return self._parse_autnum_block(block)
208
209 # organisation
210 elif line.startswith("organisation:"):
211 return self._parse_org_block(block)
212
213 def _parse_autnum_block(self, block):
214 autnum = {}
215 for line in block:
216 # Split line
217 key, val = split_line(line)
218
219 if key == "aut-num":
220 m = re.match(r"^(AS|as)(\d+)", val)
221 if m:
222 autnum["asn"] = m.group(2)
223
224 elif key == "org":
225 autnum[key] = val
226
227 # Skip empty objects
228 if not autnum:
229 return
230
231 # Insert into database
232 self.db.execute("INSERT INTO _autnums(number, organization) \
233 VALUES(%s, %s) ON CONFLICT (number) DO UPDATE SET \
234 organization = excluded.organization",
235 autnum.get("asn"), autnum.get("org"),
236 )
237
238 def _parse_org_block(self, block):
239 org = {}
240 for line in block:
241 # Split line
242 key, val = split_line(line)
243
244 if key in ("organisation", "org-name"):
245 org[key] = val
246
247 # Skip empty objects
248 if not org:
249 return
250
251 self.db.execute("INSERT INTO _organizations(handle, name) \
252 VALUES(%s, %s) ON CONFLICT (handle) DO \
253 UPDATE SET name = excluded.name",
254 org.get("organisation"), org.get("org-name"),
255 )
256
257 def _parse_line(self, line):
258 # Skip version line
259 if line.startswith("2"):
260 return
261
262 # Skip comments
263 if line.startswith("#"):
264 return
265
266 try:
267 registry, country_code, type, line = line.split("|", 3)
268 except:
269 log.warning("Could not parse line: %s" % line)
270 return
271
272 # Skip any lines that are for stats only
273 if country_code == "*":
274 return
275
276 if type in ("ipv6", "ipv4"):
277 return self._parse_ip_line(country_code, type, line)
278
279 def _parse_ip_line(self, country, type, line):
280 try:
281 address, prefix, date, status, organization = line.split("|")
282 except ValueError:
283 organization = None
284
285 # Try parsing the line without organization
286 try:
287 address, prefix, date, status = line.split("|")
288 except ValueError:
289 log.warning("Unhandled line format: %s" % line)
290 return
291
292 # Skip anything that isn't properly assigned
293 if not status in ("assigned", "allocated"):
294 return
295
296 # Cast prefix into an integer
297 try:
298 prefix = int(prefix)
299 except:
300 log.warning("Invalid prefix: %s" % prefix)
301
302 # Fix prefix length for IPv4
303 if type == "ipv4":
304 prefix = 32 - int(math.log(prefix, 2))
305
306 # Try to parse the address
307 try:
308 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
309 except ValueError:
310 log.warning("Invalid IP address: %s" % address)
311 return
312
313 self.db.execute("INSERT INTO networks(network, country) \
314 VALUES(%s, %s) ON CONFLICT (network) DO \
315 UPDATE SET country = excluded.country",
316 "%s" % network, country,
317 )
318
319 def handle_update_announcements(self, ns):
320 server = ns.server[0]
321
322 # Pre-compile regular expression for routes
323 #route = re.compile(b"^\*>?\s[\si]?([^\s]+)[.\s]*?(\d+)\si$", re.MULTILINE)
324 route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL)
325
326 with telnetlib.Telnet(server) as t:
327 # Enable debug mode
328 #if ns.debug:
329 # t.set_debuglevel(10)
330
331 # Wait for console greeting
332 greeting = t.read_until(b"> ")
333 log.debug(greeting.decode())
334
335 # Disable pagination
336 t.write(b"terminal length 0\n")
337
338 # Wait for the prompt to return
339 t.read_until(b"> ")
340
341 # Fetch the routing tables
342 with self.db.transaction():
343 for protocol in ("ipv6", "ipv4"):
344 log.info("Requesting %s routing table" % protocol)
345
346 # Request the full unicast routing table
347 t.write(b"show bgp %s unicast\n" % protocol.encode())
348
349 # Read entire header which ends with "Path"
350 t.read_until(b"Path\r\n")
351
352 while True:
353 # Try reading a full entry
354 # Those might be broken across multiple lines but ends with i
355 line = t.read_until(b"i\r\n", timeout=5)
356 if not line:
357 break
358
359 # Show line for debugging
360 #log.debug(repr(line))
361
362 # Try finding a route in here
363 m = route.match(line)
364 if m:
365 network, autnum = m.groups()
366
367 # Convert network to string
368 network = network.decode()
369
370 # Convert AS number to integer
371 autnum = int(autnum)
372
373 log.info("Found announcement for %s by %s" % (network, autnum))
374
375 self.db.execute("INSERT INTO announcements(network, autnum) \
376 VALUES(%s, %s) ON CONFLICT (network) DO \
377 UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP",
378 network, autnum,
379 )
380
381 log.info("Finished reading the %s routing table" % protocol)
382
383 # Purge anything we never want here
384 self.db.execute("""
385 -- Delete default routes
386 DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0';
387
388 -- Delete anything that is not global unicast address space
389 DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3';
390
391 -- DELETE RFC1918 address space
392 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8';
393 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12';
394 DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16';
395
396 -- Delete networks that are too small to be in the global routing table
397 DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48;
398 DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24;
399
400 -- Delete any non-public or reserved ASNs
401 DELETE FROM announcements WHERE NOT (
402 (autnum >= 1 AND autnum <= 23455)
403 OR
404 (autnum >= 23457 AND autnum <= 64495)
405 OR
406 (autnum >= 131072 AND autnum <= 4199999999)
407 );
408
409 -- Delete everything that we have not seen for 14 days
410 DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days';
411 """)
412
413 def handle_update_overrides(self, ns):
414 with self.db.transaction():
415 # Drop all data that we have
416 self.db.execute("""
417 TRUNCATE TABLE autnum_overrides;
418 TRUNCATE TABLE network_overrides;
419 """)
420
421 for file in ns.files:
422 log.info("Reading %s..." % file)
423
424 with open(file, "rb") as f:
425 for type, block in location.importer.read_blocks(f):
426 if type == "net":
427 network = block.get("net")
428 # Try to parse and normalise the network
429 try:
430 network = ipaddress.ip_network(network, strict=False)
431 except ValueError as e:
432 log.warning("Invalid IP network: %s: %s" % (network, e))
433 continue
434
435 self.db.execute("""
436 INSERT INTO network_overrides(
437 network,
438 country,
439 is_anonymous_proxy,
440 is_satellite_provider,
441 is_anycast
442 ) VALUES (%s, %s, %s, %s)
443 ON CONFLICT (network) DO NOTHING""",
444 "%s" % network,
445 block.get("country"),
446 block.get("is-anonymous-proxy") == "yes",
447 block.get("is-satellite-provider") == "yes",
448 block.get("is-anycast") == "yes",
449 )
450
451 elif type == "autnum":
452 autnum = block.get("autnum")
453
454 # Check if AS number begins with "AS"
455 if not autnum.startswith("AS"):
456 log.warning("Invalid AS number: %s" % autnum)
457 continue
458
459 # Strip "AS"
460 autnum = autnum[2:]
461
462 self.db.execute("""
463 INSERT INTO autnum_overrides(
464 number,
465 name,
466 is_anonymous_proxy,
467 is_satellite_provider,
468 is_anycast
469 ) VALUES(%s, %s, %s, %s, %s)
470 ON CONFLICT DO NOTHING""",
471 autnum, block.get("name"),
472 block.get("is-anonymous-proxy") == "yes",
473 block.get("is-satellite-provider") == "yes",
474 block.get("is-anycast") == "yes",
475 )
476
477 else:
478 log.warning("Unsupport type: %s" % type)
479
480
481 def split_line(line):
482 key, colon, val = line.partition(":")
483
484 # Strip any excess space
485 key = key.strip()
486 val = val.strip()
487
488 return key, val
489
490 def main():
491 # Run the command line interface
492 c = CLI()
493 c.run()
494
495 main()