]>
Commit | Line | Data |
---|---|---|
78ff0cf2 MT |
1 | #!/usr/bin/python3 |
2 | ############################################################################### | |
3 | # # | |
4 | # libloc - A library to determine the location of someone on the Internet # | |
5 | # # | |
1814283b | 6 | # Copyright (C) 2020-2021 IPFire Development Team <info@ipfire.org> # |
78ff0cf2 MT |
7 | # # |
8 | # This library is free software; you can redistribute it and/or # | |
9 | # modify it under the terms of the GNU Lesser General Public # | |
10 | # License as published by the Free Software Foundation; either # | |
11 | # version 2.1 of the License, or (at your option) any later version. # | |
12 | # # | |
13 | # This library is distributed in the hope that it will be useful, # | |
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # | |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # | |
16 | # Lesser General Public License for more details. # | |
17 | # # | |
18 | ############################################################################### | |
19 | ||
20 | import argparse | |
6ffd06b5 | 21 | import ipaddress |
dcef2ba4 | 22 | import json |
78ff0cf2 | 23 | import logging |
6ffd06b5 MT |
24 | import math |
25 | import re | |
22d8d199 | 26 | import socket |
78ff0cf2 | 27 | import sys |
83d61c46 | 28 | import telnetlib |
78ff0cf2 MT |
29 | |
30 | # Load our location module | |
31 | import location | |
29c6fa22 | 32 | import location.database |
3192b66c | 33 | import location.importer |
78ff0cf2 MT |
34 | from location.i18n import _ |
35 | ||
36 | # Initialise logging | |
37 | log = logging.getLogger("location.importer") | |
38 | log.propagate = 1 | |
39 | ||
43fe570c PM |
40 | # Define constants |
41 | VALID_ASN_RANGES = ( | |
42 | (1, 23455), | |
43 | (23457, 64495), | |
44 | (131072, 4199999999), | |
45 | ) | |
46 | ||
47 | ||
78ff0cf2 MT |
48 | class CLI(object): |
49 | def parse_cli(self): | |
50 | parser = argparse.ArgumentParser( | |
51 | description=_("Location Importer Command Line Interface"), | |
52 | ) | |
6ffd06b5 | 53 | subparsers = parser.add_subparsers() |
78ff0cf2 MT |
54 | |
55 | # Global configuration flags | |
56 | parser.add_argument("--debug", action="store_true", | |
57 | help=_("Enable debug output")) | |
bc1f5f53 MT |
58 | parser.add_argument("--quiet", action="store_true", |
59 | help=_("Enable quiet mode")) | |
78ff0cf2 MT |
60 | |
61 | # version | |
62 | parser.add_argument("--version", action="version", | |
63 | version="%(prog)s @VERSION@") | |
64 | ||
29c6fa22 MT |
65 | # Database |
66 | parser.add_argument("--database-host", required=True, | |
67 | help=_("Database Hostname"), metavar=_("HOST")) | |
68 | parser.add_argument("--database-name", required=True, | |
69 | help=_("Database Name"), metavar=_("NAME")) | |
70 | parser.add_argument("--database-username", required=True, | |
71 | help=_("Database Username"), metavar=_("USERNAME")) | |
72 | parser.add_argument("--database-password", required=True, | |
73 | help=_("Database Password"), metavar=_("PASSWORD")) | |
74 | ||
0983f3dd MT |
75 | # Write Database |
76 | write = subparsers.add_parser("write", help=_("Write database to file")) | |
77 | write.set_defaults(func=self.handle_write) | |
78 | write.add_argument("file", nargs=1, help=_("Database File")) | |
79 | write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key")) | |
1164d876 | 80 | write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key")) |
0983f3dd MT |
81 | write.add_argument("--vendor", nargs="?", help=_("Sets the vendor")) |
82 | write.add_argument("--description", nargs="?", help=_("Sets a description")) | |
83 | write.add_argument("--license", nargs="?", help=_("Sets the license")) | |
b904896a | 84 | write.add_argument("--version", type=int, help=_("Database Format Version")) |
0983f3dd | 85 | |
6ffd06b5 MT |
86 | # Update WHOIS |
87 | update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information")) | |
88 | update_whois.set_defaults(func=self.handle_update_whois) | |
89 | ||
83d61c46 MT |
90 | # Update announcements |
91 | update_announcements = subparsers.add_parser("update-announcements", | |
92 | help=_("Update BGP Annoucements")) | |
93 | update_announcements.set_defaults(func=self.handle_update_announcements) | |
94 | update_announcements.add_argument("server", nargs=1, | |
95 | help=_("Route Server to connect to"), metavar=_("SERVER")) | |
96 | ||
d7fc3057 MT |
97 | # Update overrides |
98 | update_overrides = subparsers.add_parser("update-overrides", | |
99 | help=_("Update overrides"), | |
100 | ) | |
101 | update_overrides.add_argument( | |
102 | "files", nargs="+", help=_("Files to import"), | |
103 | ) | |
104 | update_overrides.set_defaults(func=self.handle_update_overrides) | |
105 | ||
8084b33a MT |
106 | # Import countries |
107 | import_countries = subparsers.add_parser("import-countries", | |
108 | help=_("Import countries"), | |
109 | ) | |
110 | import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"), | |
111 | help=_("File to import")) | |
112 | import_countries.set_defaults(func=self.handle_import_countries) | |
113 | ||
78ff0cf2 MT |
114 | args = parser.parse_args() |
115 | ||
bc1f5f53 | 116 | # Configure logging |
78ff0cf2 | 117 | if args.debug: |
f9de5e61 | 118 | location.logger.set_level(logging.DEBUG) |
bc1f5f53 MT |
119 | elif args.quiet: |
120 | location.logger.set_level(logging.WARNING) | |
78ff0cf2 | 121 | |
6ffd06b5 MT |
122 | # Print usage if no action was given |
123 | if not "func" in args: | |
124 | parser.print_usage() | |
125 | sys.exit(2) | |
126 | ||
78ff0cf2 MT |
127 | return args |
128 | ||
129 | def run(self): | |
130 | # Parse command line arguments | |
131 | args = self.parse_cli() | |
132 | ||
29c6fa22 | 133 | # Initialise database |
6ffd06b5 | 134 | self.db = self._setup_database(args) |
29c6fa22 | 135 | |
78ff0cf2 | 136 | # Call function |
6ffd06b5 | 137 | ret = args.func(args) |
78ff0cf2 MT |
138 | |
139 | # Return with exit code | |
140 | if ret: | |
141 | sys.exit(ret) | |
142 | ||
143 | # Otherwise just exit | |
144 | sys.exit(0) | |
145 | ||
29c6fa22 MT |
146 | def _setup_database(self, ns): |
147 | """ | |
148 | Initialise the database | |
149 | """ | |
150 | # Connect to database | |
151 | db = location.database.Connection( | |
152 | host=ns.database_host, database=ns.database_name, | |
153 | user=ns.database_username, password=ns.database_password, | |
154 | ) | |
155 | ||
156 | with db.transaction(): | |
157 | db.execute(""" | |
83d61c46 MT |
158 | -- announcements |
159 | CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint, | |
160 | first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, | |
161 | last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP); | |
162 | CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network); | |
163 | CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network)); | |
a1707d89 | 164 | CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops); |
83d61c46 | 165 | |
6ffd06b5 | 166 | -- autnums |
0983f3dd | 167 | CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL); |
26f06e70 | 168 | ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text; |
6ffd06b5 MT |
169 | CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number); |
170 | ||
8084b33a MT |
171 | -- countries |
172 | CREATE TABLE IF NOT EXISTS countries( | |
173 | country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL); | |
174 | CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code); | |
175 | ||
429a43d1 | 176 | -- networks |
83d61c46 | 177 | CREATE TABLE IF NOT EXISTS networks(network inet, country text); |
b6b2b331 | 178 | ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[]; |
26f06e70 | 179 | ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text; |
429a43d1 | 180 | CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network); |
002deb6b | 181 | CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network)); |
83d61c46 | 182 | CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops); |
d7fc3057 MT |
183 | |
184 | -- overrides | |
185 | CREATE TABLE IF NOT EXISTS autnum_overrides( | |
186 | number bigint NOT NULL, | |
187 | name text, | |
bd1aa6a1 | 188 | country text, |
b8e25b71 MT |
189 | is_anonymous_proxy boolean, |
190 | is_satellite_provider boolean, | |
191 | is_anycast boolean | |
d7fc3057 MT |
192 | ); |
193 | CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number | |
194 | ON autnum_overrides(number); | |
39ee3120 | 195 | ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text; |
e17e804e | 196 | ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean; |
d7fc3057 MT |
197 | |
198 | CREATE TABLE IF NOT EXISTS network_overrides( | |
199 | network inet NOT NULL, | |
200 | country text, | |
b8e25b71 MT |
201 | is_anonymous_proxy boolean, |
202 | is_satellite_provider boolean, | |
203 | is_anycast boolean | |
d7fc3057 MT |
204 | ); |
205 | CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network | |
206 | ON network_overrides(network); | |
991baf53 MT |
207 | CREATE INDEX IF NOT EXISTS network_overrides_search |
208 | ON network_overrides USING GIST(network inet_ops); | |
39ee3120 | 209 | ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text; |
e17e804e | 210 | ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean; |
29c6fa22 MT |
211 | """) |
212 | ||
213 | return db | |
214 | ||
0983f3dd MT |
215 | def handle_write(self, ns): |
216 | """ | |
217 | Compiles a database in libloc format out of what is in the database | |
218 | """ | |
0983f3dd | 219 | # Allocate a writer |
1164d876 | 220 | writer = location.Writer(ns.signing_key, ns.backup_signing_key) |
0983f3dd MT |
221 | |
222 | # Set all metadata | |
223 | if ns.vendor: | |
224 | writer.vendor = ns.vendor | |
225 | ||
226 | if ns.description: | |
227 | writer.description = ns.description | |
228 | ||
229 | if ns.license: | |
230 | writer.license = ns.license | |
231 | ||
232 | # Add all Autonomous Systems | |
233 | log.info("Writing Autonomous Systems...") | |
234 | ||
235 | # Select all ASes with a name | |
6e97c44b MT |
236 | rows = self.db.query(""" |
237 | SELECT | |
238 | autnums.number AS number, | |
239 | COALESCE( | |
240 | (SELECT overrides.name FROM autnum_overrides overrides | |
241 | WHERE overrides.number = autnums.number), | |
242 | autnums.name | |
243 | ) AS name | |
244 | FROM autnums | |
245 | WHERE name <> %s ORDER BY number | |
246 | """, "") | |
0983f3dd MT |
247 | |
248 | for row in rows: | |
249 | a = writer.add_as(row.number) | |
250 | a.name = row.name | |
251 | ||
252 | # Add all networks | |
253 | log.info("Writing networks...") | |
254 | ||
255 | # Select all known networks | |
256 | rows = self.db.query(""" | |
5372d9c7 MT |
257 | -- Return a list of those networks enriched with all |
258 | -- other information that we store in the database | |
0983f3dd | 259 | SELECT |
bbea93a7 MT |
260 | DISTINCT ON (network) |
261 | network, | |
262 | autnum, | |
bd1aa6a1 MT |
263 | |
264 | -- Country | |
265 | COALESCE( | |
266 | ( | |
267 | SELECT country FROM network_overrides overrides | |
bbea93a7 | 268 | WHERE networks.network <<= overrides.network |
bd1aa6a1 MT |
269 | ORDER BY masklen(overrides.network) DESC |
270 | LIMIT 1 | |
271 | ), | |
272 | ( | |
273 | SELECT country FROM autnum_overrides overrides | |
bbea93a7 | 274 | WHERE networks.autnum = overrides.number |
bd1aa6a1 MT |
275 | ), |
276 | networks.country | |
277 | ) AS country, | |
8e8555bb | 278 | |
0983f3dd | 279 | -- Flags |
1422b5d4 MT |
280 | COALESCE( |
281 | ( | |
282 | SELECT is_anonymous_proxy FROM network_overrides overrides | |
bbea93a7 | 283 | WHERE networks.network <<= overrides.network |
1422b5d4 MT |
284 | ORDER BY masklen(overrides.network) DESC |
285 | LIMIT 1 | |
286 | ), | |
287 | ( | |
288 | SELECT is_anonymous_proxy FROM autnum_overrides overrides | |
bbea93a7 | 289 | WHERE networks.autnum = overrides.number |
b8e25b71 MT |
290 | ), |
291 | FALSE | |
1422b5d4 MT |
292 | ) AS is_anonymous_proxy, |
293 | COALESCE( | |
294 | ( | |
295 | SELECT is_satellite_provider FROM network_overrides overrides | |
bbea93a7 | 296 | WHERE networks.network <<= overrides.network |
1422b5d4 MT |
297 | ORDER BY masklen(overrides.network) DESC |
298 | LIMIT 1 | |
299 | ), | |
300 | ( | |
301 | SELECT is_satellite_provider FROM autnum_overrides overrides | |
bbea93a7 | 302 | WHERE networks.autnum = overrides.number |
b8e25b71 MT |
303 | ), |
304 | FALSE | |
1422b5d4 MT |
305 | ) AS is_satellite_provider, |
306 | COALESCE( | |
307 | ( | |
308 | SELECT is_anycast FROM network_overrides overrides | |
bbea93a7 | 309 | WHERE networks.network <<= overrides.network |
1422b5d4 MT |
310 | ORDER BY masklen(overrides.network) DESC |
311 | LIMIT 1 | |
312 | ), | |
313 | ( | |
314 | SELECT is_anycast FROM autnum_overrides overrides | |
bbea93a7 | 315 | WHERE networks.autnum = overrides.number |
b8e25b71 MT |
316 | ), |
317 | FALSE | |
e17e804e PM |
318 | ) AS is_anycast, |
319 | COALESCE( | |
320 | ( | |
321 | SELECT is_drop FROM network_overrides overrides | |
322 | WHERE networks.network <<= overrides.network | |
323 | ORDER BY masklen(overrides.network) DESC | |
324 | LIMIT 1 | |
325 | ), | |
326 | ( | |
327 | SELECT is_drop FROM autnum_overrides overrides | |
328 | WHERE networks.autnum = overrides.number | |
329 | ), | |
330 | FALSE | |
331 | ) AS is_drop | |
bbea93a7 MT |
332 | FROM ( |
333 | SELECT | |
334 | known_networks.network AS network, | |
335 | announcements.autnum AS autnum, | |
336 | networks.country AS country, | |
337 | ||
338 | -- Must be part of returned values for ORDER BY clause | |
339 | masklen(announcements.network) AS sort_a, | |
340 | masklen(networks.network) AS sort_b | |
341 | FROM ( | |
342 | SELECT network FROM announcements | |
343 | UNION ALL | |
344 | SELECT network FROM networks | |
345 | UNION ALL | |
346 | SELECT network FROM network_overrides | |
347 | ) known_networks | |
348 | LEFT JOIN | |
349 | announcements ON known_networks.network <<= announcements.network | |
350 | LEFT JOIN | |
351 | networks ON known_networks.network <<= networks.network | |
352 | ORDER BY | |
353 | known_networks.network, | |
354 | sort_a DESC, | |
355 | sort_b DESC | |
356 | ) networks | |
0983f3dd MT |
357 | """) |
358 | ||
359 | for row in rows: | |
360 | network = writer.add_network(row.network) | |
361 | ||
5372d9c7 MT |
362 | # Save country |
363 | if row.country: | |
364 | network.country_code = row.country | |
365 | ||
366 | # Save ASN | |
367 | if row.autnum: | |
368 | network.asn = row.autnum | |
0983f3dd MT |
369 | |
370 | # Set flags | |
371 | if row.is_anonymous_proxy: | |
372 | network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY) | |
373 | ||
374 | if row.is_satellite_provider: | |
375 | network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER) | |
376 | ||
377 | if row.is_anycast: | |
378 | network.set_flag(location.NETWORK_FLAG_ANYCAST) | |
379 | ||
e17e804e PM |
380 | if row.is_drop: |
381 | network.set_flag(location.NETWORK_FLAG_DROP) | |
382 | ||
8084b33a MT |
383 | # Add all countries |
384 | log.info("Writing countries...") | |
385 | rows = self.db.query("SELECT * FROM countries ORDER BY country_code") | |
386 | ||
387 | for row in rows: | |
388 | c = writer.add_country(row.country_code) | |
389 | c.continent_code = row.continent_code | |
390 | c.name = row.name | |
391 | ||
0983f3dd MT |
392 | # Write everything to file |
393 | log.info("Writing database to file...") | |
394 | for file in ns.file: | |
395 | writer.write(file) | |
396 | ||
6ffd06b5 MT |
397 | def handle_update_whois(self, ns): |
398 | downloader = location.importer.Downloader() | |
399 | ||
400 | # Download all sources | |
0365119d MT |
401 | with self.db.transaction(): |
402 | # Create some temporary tables to store parsed data | |
403 | self.db.execute(""" | |
426e0bee | 404 | CREATE TEMPORARY TABLE _autnums(number integer NOT NULL, organization text NOT NULL, source text NOT NULL) |
0365119d MT |
405 | ON COMMIT DROP; |
406 | CREATE UNIQUE INDEX _autnums_number ON _autnums(number); | |
407 | ||
426e0bee | 408 | CREATE TEMPORARY TABLE _organizations(handle text NOT NULL, name text NOT NULL, source text NOT NULL) |
0365119d MT |
409 | ON COMMIT DROP; |
410 | CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle); | |
002deb6b | 411 | |
b6b2b331 | 412 | CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL) |
002deb6b PM |
413 | ON COMMIT DROP; |
414 | CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network)); | |
415 | CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network); | |
416 | """) | |
417 | ||
418 | # Remove all previously imported content | |
419 | self.db.execute(""" | |
420 | TRUNCATE TABLE networks; | |
0365119d MT |
421 | """) |
422 | ||
84b175e2 PM |
423 | # Fetch all valid country codes to check parsed networks aganist... |
424 | rows = self.db.query("SELECT * FROM countries ORDER BY country_code") | |
425 | validcountries = [] | |
426 | ||
427 | for row in rows: | |
428 | validcountries.append(row.country_code) | |
429 | ||
28b08385 PM |
430 | for source_key in location.importer.WHOIS_SOURCES: |
431 | for single_url in location.importer.WHOIS_SOURCES[source_key]: | |
432 | with downloader.request(single_url, return_blocks=True) as f: | |
433 | for block in f: | |
434 | self._parse_block(block, source_key, validcountries) | |
6ffd06b5 | 435 | |
002deb6b PM |
436 | # Process all parsed networks from every RIR we happen to have access to, |
437 | # insert the largest network chunks into the networks table immediately... | |
438 | families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)") | |
439 | ||
440 | for family in (row.family for row in families): | |
441 | smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family) | |
442 | ||
b6b2b331 PM |
443 | self.db.execute("INSERT INTO networks(network, country, original_countries, source) \ |
444 | SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family) | |
002deb6b PM |
445 | |
446 | # ... determine any other prefixes for this network family, ... | |
447 | prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \ | |
448 | WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family) | |
449 | ||
450 | # ... and insert networks with this prefix in case they provide additional | |
451 | # information (i. e. subnet of a larger chunk with a different country) | |
452 | for prefix in (row.prefix for row in prefixes): | |
453 | self.db.execute(""" | |
454 | WITH candidates AS ( | |
455 | SELECT | |
456 | _rirdata.network, | |
28b08385 | 457 | _rirdata.country, |
b6b2b331 | 458 | _rirdata.original_countries, |
28b08385 | 459 | _rirdata.source |
002deb6b PM |
460 | FROM |
461 | _rirdata | |
462 | WHERE | |
463 | family(_rirdata.network) = %s | |
464 | AND | |
465 | masklen(_rirdata.network) = %s | |
466 | ), | |
467 | filtered AS ( | |
468 | SELECT | |
469 | DISTINCT ON (c.network) | |
470 | c.network, | |
471 | c.country, | |
b6b2b331 | 472 | c.original_countries, |
28b08385 | 473 | c.source, |
002deb6b PM |
474 | masklen(networks.network), |
475 | networks.country AS parent_country | |
476 | FROM | |
477 | candidates c | |
478 | LEFT JOIN | |
479 | networks | |
480 | ON | |
481 | c.network << networks.network | |
482 | ORDER BY | |
483 | c.network, | |
484 | masklen(networks.network) DESC NULLS LAST | |
485 | ) | |
486 | INSERT INTO | |
b6b2b331 | 487 | networks(network, country, original_countries, source) |
002deb6b PM |
488 | SELECT |
489 | network, | |
28b08385 | 490 | country, |
b6b2b331 | 491 | original_countries, |
28b08385 | 492 | source |
002deb6b PM |
493 | FROM |
494 | filtered | |
495 | WHERE | |
496 | parent_country IS NULL | |
497 | OR | |
498 | country <> parent_country | |
499 | ON CONFLICT DO NOTHING""", | |
500 | family, prefix, | |
501 | ) | |
502 | ||
0365119d | 503 | self.db.execute(""" |
28b08385 PM |
504 | INSERT INTO autnums(number, name, source) |
505 | SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums | |
2cd2e342 | 506 | JOIN _organizations ON _autnums.organization = _organizations.handle |
ee6ea398 | 507 | ON CONFLICT (number) DO UPDATE SET name = excluded.name; |
0365119d MT |
508 | """) |
509 | ||
429a43d1 | 510 | # Download all extended sources |
28b08385 PM |
511 | for source_key in location.importer.EXTENDED_SOURCES: |
512 | for single_url in location.importer.EXTENDED_SOURCES[source_key]: | |
513 | with self.db.transaction(): | |
514 | # Download data | |
515 | with downloader.request(single_url) as f: | |
516 | for line in f: | |
517 | self._parse_line(line, source_key, validcountries) | |
429a43d1 | 518 | |
92403f39 PM |
519 | # Download and import (technical) AS names from ARIN |
520 | self._import_as_names_from_arin() | |
521 | ||
bd341642 PM |
522 | def _check_parsed_network(self, network): |
523 | """ | |
524 | Assistive function to detect and subsequently sort out parsed | |
525 | networks from RIR data (both Whois and so-called "extended sources"), | |
526 | which are or have... | |
527 | ||
528 | (a) not globally routable (RFC 1918 space, et al.) | |
529 | (b) covering a too large chunk of the IP address space (prefix length | |
530 | is < 7 for IPv4 networks, and < 10 for IPv6) | |
531 | (c) "0.0.0.0" or "::" as a network address | |
532 | (d) are too small for being publicly announced (we have decided not to | |
533 | process them at the moment, as they significantly enlarge our | |
534 | database without providing very helpful additional information) | |
535 | ||
536 | This unfortunately is necessary due to brain-dead clutter across | |
537 | various RIR databases, causing mismatches and eventually disruptions. | |
538 | ||
539 | We will return False in case a network is not suitable for adding | |
540 | it to our database, and True otherwise. | |
541 | """ | |
542 | ||
543 | if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)): | |
544 | return False | |
545 | ||
546 | if not network.is_global: | |
2ba6ed07 | 547 | log.debug("Skipping non-globally routable network: %s" % network) |
bd341642 PM |
548 | return False |
549 | ||
550 | if network.version == 4: | |
551 | if network.prefixlen < 7: | |
2ba6ed07 | 552 | log.debug("Skipping too big IP chunk: %s" % network) |
bd341642 PM |
553 | return False |
554 | ||
555 | if network.prefixlen > 24: | |
ebb087cf | 556 | log.debug("Skipping network too small to be publicly announced: %s" % network) |
bd341642 PM |
557 | return False |
558 | ||
559 | if str(network.network_address) == "0.0.0.0": | |
2ba6ed07 | 560 | log.debug("Skipping network based on 0.0.0.0: %s" % network) |
bd341642 PM |
561 | return False |
562 | ||
563 | elif network.version == 6: | |
564 | if network.prefixlen < 10: | |
2ba6ed07 | 565 | log.debug("Skipping too big IP chunk: %s" % network) |
bd341642 PM |
566 | return False |
567 | ||
568 | if network.prefixlen > 48: | |
ebb087cf | 569 | log.debug("Skipping network too small to be publicly announced: %s" % network) |
bd341642 PM |
570 | return False |
571 | ||
572 | if str(network.network_address) == "::": | |
2ba6ed07 | 573 | log.debug("Skipping network based on '::': %s" % network) |
bd341642 PM |
574 | return False |
575 | ||
576 | else: | |
577 | # This should not happen... | |
84187ab5 | 578 | log.warning("Skipping network of unknown family, this should not happen: %s" % network) |
bd341642 PM |
579 | return False |
580 | ||
581 | # In case we have made it here, the network is considered to | |
582 | # be suitable for libloc consumption... | |
583 | return True | |
584 | ||
43fe570c PM |
585 | def _check_parsed_asn(self, asn): |
586 | """ | |
587 | Assistive function to filter Autonomous System Numbers not being suitable | |
588 | for adding to our database. Returns False in such cases, and True otherwise. | |
589 | """ | |
590 | ||
591 | for start, end in VALID_ASN_RANGES: | |
592 | if start <= asn and end >= asn: | |
593 | return True | |
594 | ||
595 | log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn) | |
596 | return False | |
597 | ||
28b08385 | 598 | def _parse_block(self, block, source_key, validcountries = None): |
6ffd06b5 MT |
599 | # Get first line to find out what type of block this is |
600 | line = block[0] | |
601 | ||
6ffd06b5 | 602 | # aut-num |
429a43d1 | 603 | if line.startswith("aut-num:"): |
28b08385 | 604 | return self._parse_autnum_block(block, source_key) |
6ffd06b5 | 605 | |
aadac4c5 PM |
606 | # inetnum |
607 | if line.startswith("inet6num:") or line.startswith("inetnum:"): | |
28b08385 | 608 | return self._parse_inetnum_block(block, source_key, validcountries) |
aadac4c5 | 609 | |
6ffd06b5 MT |
610 | # organisation |
611 | elif line.startswith("organisation:"): | |
28b08385 | 612 | return self._parse_org_block(block, source_key) |
6ffd06b5 | 613 | |
28b08385 | 614 | def _parse_autnum_block(self, block, source_key): |
6ffd06b5 MT |
615 | autnum = {} |
616 | for line in block: | |
617 | # Split line | |
618 | key, val = split_line(line) | |
619 | ||
620 | if key == "aut-num": | |
621 | m = re.match(r"^(AS|as)(\d+)", val) | |
622 | if m: | |
623 | autnum["asn"] = m.group(2) | |
624 | ||
0365119d | 625 | elif key == "org": |
e7d612e5 | 626 | autnum[key] = val.upper() |
6ffd06b5 | 627 | |
426e0bee PM |
628 | elif key == "descr": |
629 | # Save the first description line as well... | |
630 | if not key in autnum: | |
631 | autnum[key] = val | |
632 | ||
6ffd06b5 | 633 | # Skip empty objects |
426e0bee | 634 | if not autnum or not "asn" in autnum: |
6ffd06b5 MT |
635 | return |
636 | ||
426e0bee PM |
637 | # Insert a dummy organisation handle into our temporary organisations |
638 | # table in case the AS does not have an organisation handle set, but | |
639 | # has a description (a quirk often observed in APNIC area), so we can | |
640 | # later display at least some string for this AS. | |
641 | if not "org" in autnum: | |
642 | if "descr" in autnum: | |
643 | autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn") | |
644 | ||
645 | self.db.execute("INSERT INTO _organizations(handle, name, source) \ | |
646 | VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING", | |
647 | autnum.get("org"), autnum.get("descr"), source_key, | |
648 | ) | |
649 | else: | |
650 | log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \ | |
651 | autnum.get("asn")) | |
652 | return | |
653 | ||
6ffd06b5 | 654 | # Insert into database |
28b08385 PM |
655 | self.db.execute("INSERT INTO _autnums(number, organization, source) \ |
656 | VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \ | |
0365119d | 657 | organization = excluded.organization", |
28b08385 | 658 | autnum.get("asn"), autnum.get("org"), source_key, |
6ffd06b5 MT |
659 | ) |
660 | ||
28b08385 | 661 | def _parse_inetnum_block(self, block, source_key, validcountries = None): |
84187ab5 | 662 | log.debug("Parsing inetnum block:") |
aadac4c5 PM |
663 | |
664 | inetnum = {} | |
665 | for line in block: | |
84187ab5 | 666 | log.debug(line) |
aadac4c5 PM |
667 | |
668 | # Split line | |
669 | key, val = split_line(line) | |
670 | ||
84187ab5 PM |
671 | # Filter any inetnum records which are only referring to IP space |
672 | # not managed by that specific RIR... | |
673 | if key == "netname": | |
5254e5fc | 674 | if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()): |
2ba6ed07 | 675 | log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip()) |
84187ab5 PM |
676 | return |
677 | ||
aadac4c5 PM |
678 | if key == "inetnum": |
679 | start_address, delim, end_address = val.partition("-") | |
680 | ||
681 | # Strip any excess space | |
682 | start_address, end_address = start_address.rstrip(), end_address.strip() | |
683 | ||
845da577 PM |
684 | # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22") |
685 | if start_address and not (delim or end_address): | |
686 | try: | |
687 | start_address = ipaddress.ip_network(start_address, strict=False) | |
688 | except ValueError: | |
689 | start_address = start_address.split("/") | |
38d0faea | 690 | ldigits = start_address[0].count(".") |
845da577 PM |
691 | |
692 | # How many octets do we need to add? | |
693 | # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.) | |
694 | if ldigits == 2: | |
695 | start_address = start_address[0] + ".0.0/" + start_address[1] | |
696 | elif ldigits == 3: | |
697 | start_address = start_address[0] + ".0/" + start_address[1] | |
698 | else: | |
699 | log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line) | |
700 | return | |
701 | ||
702 | try: | |
703 | start_address = ipaddress.ip_network(start_address, strict=False) | |
704 | except ValueError: | |
705 | log.warning("Could not parse line in LACNIC DB format: %s" % line) | |
706 | return | |
707 | ||
708 | # Enumerate first and last IP address of this network | |
709 | end_address = start_address[-1] | |
710 | start_address = start_address[0] | |
711 | ||
712 | else: | |
713 | # Convert to IP address | |
714 | try: | |
715 | start_address = ipaddress.ip_address(start_address) | |
716 | end_address = ipaddress.ip_address(end_address) | |
717 | except ValueError: | |
718 | log.warning("Could not parse line: %s" % line) | |
719 | return | |
aadac4c5 | 720 | |
1814283b | 721 | inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address)) |
aadac4c5 PM |
722 | |
723 | elif key == "inet6num": | |
1814283b | 724 | inetnum[key] = [ipaddress.ip_network(val, strict=False)] |
aadac4c5 PM |
725 | |
726 | elif key == "country": | |
7434e5e0 PM |
727 | val = val.upper() |
728 | ||
b6b2b331 | 729 | # Catch RIR data objects with more than one country code... |
7434e5e0 | 730 | if not key in inetnum: |
b6b2b331 PM |
731 | inetnum[key] = [] |
732 | else: | |
7434e5e0 | 733 | if val in inetnum.get("country"): |
b6b2b331 PM |
734 | # ... but keep this list distinct... |
735 | continue | |
736 | ||
01e5f0ff PM |
737 | # When people set country codes to "UK", they actually mean "GB" |
738 | if val == "UK": | |
739 | val = "GB" | |
740 | ||
7434e5e0 | 741 | inetnum[key].append(val) |
aadac4c5 PM |
742 | |
743 | # Skip empty objects | |
002deb6b | 744 | if not inetnum or not "country" in inetnum: |
aadac4c5 PM |
745 | return |
746 | ||
b6b2b331 PM |
747 | # Prepare skipping objects with unknown country codes... |
748 | invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries] | |
749 | ||
1814283b PM |
750 | # Iterate through all networks enumerated from above, check them for plausibility and insert |
751 | # them into the database, if _check_parsed_network() succeeded | |
752 | for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"): | |
753 | if self._check_parsed_network(single_network): | |
7138b4ac | 754 | |
b6b2b331 PM |
755 | # Skip objects with unknown country codes if they are valid to avoid log spam... |
756 | if validcountries and invalidcountries: | |
757 | log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \ | |
758 | (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum"))) | |
48770ca8 | 759 | break |
7138b4ac PM |
760 | |
761 | # Everything is fine here, run INSERT statement... | |
b6b2b331 PM |
762 | self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \ |
763 | VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", | |
764 | "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key, | |
1814283b | 765 | ) |
aadac4c5 | 766 | |
28b08385 | 767 | def _parse_org_block(self, block, source_key): |
6ffd06b5 MT |
768 | org = {} |
769 | for line in block: | |
770 | # Split line | |
771 | key, val = split_line(line) | |
772 | ||
e7d612e5 PM |
773 | if key == "organisation": |
774 | org[key] = val.upper() | |
775 | elif key == "org-name": | |
6ffd06b5 MT |
776 | org[key] = val |
777 | ||
778 | # Skip empty objects | |
779 | if not org: | |
780 | return | |
781 | ||
28b08385 PM |
782 | self.db.execute("INSERT INTO _organizations(handle, name, source) \ |
783 | VALUES(%s, %s, %s) ON CONFLICT (handle) DO \ | |
0365119d | 784 | UPDATE SET name = excluded.name", |
28b08385 | 785 | org.get("organisation"), org.get("org-name"), source_key, |
6ffd06b5 MT |
786 | ) |
787 | ||
28b08385 | 788 | def _parse_line(self, line, source_key, validcountries = None): |
429a43d1 MT |
789 | # Skip version line |
790 | if line.startswith("2"): | |
791 | return | |
6ffd06b5 | 792 | |
429a43d1 MT |
793 | # Skip comments |
794 | if line.startswith("#"): | |
795 | return | |
6ffd06b5 | 796 | |
429a43d1 MT |
797 | try: |
798 | registry, country_code, type, line = line.split("|", 3) | |
799 | except: | |
800 | log.warning("Could not parse line: %s" % line) | |
801 | return | |
6ffd06b5 | 802 | |
84b175e2 PM |
803 | # Skip any lines that are for stats only or do not have a country |
804 | # code at all (avoids log spam below) | |
805 | if not country_code or country_code == '*': | |
806 | return | |
807 | ||
808 | # Skip objects with unknown country codes | |
809 | if validcountries and country_code not in validcountries: | |
810 | log.warning("Skipping line with bogus country '%s': %s" % \ | |
811 | (country_code, line)) | |
6ffd06b5 MT |
812 | return |
813 | ||
429a43d1 | 814 | if type in ("ipv6", "ipv4"): |
28b08385 | 815 | return self._parse_ip_line(country_code, type, line, source_key) |
429a43d1 | 816 | |
28b08385 | 817 | def _parse_ip_line(self, country, type, line, source_key): |
429a43d1 MT |
818 | try: |
819 | address, prefix, date, status, organization = line.split("|") | |
820 | except ValueError: | |
821 | organization = None | |
822 | ||
823 | # Try parsing the line without organization | |
824 | try: | |
825 | address, prefix, date, status = line.split("|") | |
826 | except ValueError: | |
827 | log.warning("Unhandled line format: %s" % line) | |
828 | return | |
829 | ||
830 | # Skip anything that isn't properly assigned | |
831 | if not status in ("assigned", "allocated"): | |
832 | return | |
833 | ||
834 | # Cast prefix into an integer | |
835 | try: | |
836 | prefix = int(prefix) | |
837 | except: | |
838 | log.warning("Invalid prefix: %s" % prefix) | |
7177031f | 839 | return |
429a43d1 MT |
840 | |
841 | # Fix prefix length for IPv4 | |
842 | if type == "ipv4": | |
843 | prefix = 32 - int(math.log(prefix, 2)) | |
844 | ||
845 | # Try to parse the address | |
846 | try: | |
847 | network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False) | |
848 | except ValueError: | |
849 | log.warning("Invalid IP address: %s" % address) | |
850 | return | |
851 | ||
bd341642 PM |
852 | if not self._check_parsed_network(network): |
853 | return | |
854 | ||
b6b2b331 PM |
855 | self.db.execute("INSERT INTO networks(network, country, original_countries, source) \ |
856 | VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \ | |
87b3e102 | 857 | UPDATE SET country = excluded.country", |
b6b2b331 | 858 | "%s" % network, country, [country], source_key, |
6ffd06b5 MT |
859 | ) |
860 | ||
92403f39 PM |
861 | def _import_as_names_from_arin(self): |
862 | downloader = location.importer.Downloader() | |
863 | ||
864 | # XXX: Download AS names file from ARIN (note that these names appear to be quite | |
865 | # technical, not intended for human consumption, as description fields in | |
866 | # organisation handles for other RIRs are - however, this is what we have got, | |
867 | # and in some cases, it might be still better than nothing) | |
868 | with downloader.request("https://ftp.arin.net/info/asn.txt", return_blocks=False) as f: | |
869 | for line in f: | |
870 | # Convert binary line to string... | |
871 | line = str(line) | |
872 | ||
873 | # ... valid lines start with a space, followed by the number of the Autonomous System ... | |
874 | if not line.startswith(" "): | |
875 | continue | |
876 | ||
877 | # Split line and check if there is a valid ASN in it... | |
878 | asn, name = line.split()[0:2] | |
879 | ||
880 | try: | |
881 | asn = int(asn) | |
882 | except ValueError: | |
883 | log.debug("Skipping ARIN AS names line not containing an integer for ASN") | |
884 | continue | |
885 | ||
43fe570c PM |
886 | # Filter invalid ASNs... |
887 | if not self._check_parsed_asn(asn): | |
92403f39 PM |
888 | continue |
889 | ||
890 | # Skip any AS name that appears to be a placeholder for a different RIR or entity... | |
378b4c0a | 891 | if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|DNIC|LACNIC|RIPE|IANA)(?:\d?$|\-)", name): |
92403f39 PM |
892 | continue |
893 | ||
894 | # Bail out in case the AS name contains anything we do not expect here... | |
895 | if re.search(r"[^a-zA-Z0-9-_]", name): | |
896 | log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \ | |
897 | (asn, name)) | |
898 | ||
899 | # Things look good here, run INSERT statement and skip this one if we already have | |
900 | # a (better?) name for this Autonomous System... | |
901 | self.db.execute(""" | |
902 | INSERT INTO autnums( | |
903 | number, | |
904 | name, | |
905 | source | |
906 | ) VALUES (%s, %s, %s) | |
907 | ON CONFLICT (number) DO NOTHING""", | |
908 | asn, | |
909 | name, | |
910 | "ARIN", | |
911 | ) | |
912 | ||
83d61c46 MT |
913 | def handle_update_announcements(self, ns): |
914 | server = ns.server[0] | |
915 | ||
22d8d199 MT |
916 | with self.db.transaction(): |
917 | if server.startswith("/"): | |
918 | self._handle_update_announcements_from_bird(server) | |
919 | else: | |
920 | self._handle_update_announcements_from_telnet(server) | |
921 | ||
922 | # Purge anything we never want here | |
923 | self.db.execute(""" | |
924 | -- Delete default routes | |
925 | DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0'; | |
926 | ||
927 | -- Delete anything that is not global unicast address space | |
928 | DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3'; | |
929 | ||
930 | -- DELETE "current network" address space | |
931 | DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8'; | |
932 | ||
933 | -- DELETE local loopback address space | |
934 | DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8'; | |
935 | ||
936 | -- DELETE RFC 1918 address space | |
937 | DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8'; | |
938 | DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12'; | |
939 | DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16'; | |
940 | ||
941 | -- DELETE test, benchmark and documentation address space | |
942 | DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24'; | |
943 | DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24'; | |
944 | DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15'; | |
945 | DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24'; | |
946 | DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24'; | |
947 | ||
948 | -- DELETE CGNAT address space (RFC 6598) | |
949 | DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10'; | |
950 | ||
951 | -- DELETE link local address space | |
952 | DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16'; | |
953 | ||
b4d5b2a6 | 954 | -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068) |
22d8d199 | 955 | DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24'; |
b4d5b2a6 | 956 | DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16'; |
22d8d199 MT |
957 | |
958 | -- DELETE multicast and reserved address space | |
959 | DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4'; | |
960 | DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4'; | |
961 | ||
962 | -- Delete networks that are too small to be in the global routing table | |
963 | DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48; | |
964 | DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24; | |
965 | ||
966 | -- Delete any non-public or reserved ASNs | |
967 | DELETE FROM announcements WHERE NOT ( | |
968 | (autnum >= 1 AND autnum <= 23455) | |
969 | OR | |
970 | (autnum >= 23457 AND autnum <= 64495) | |
971 | OR | |
972 | (autnum >= 131072 AND autnum <= 4199999999) | |
973 | ); | |
974 | ||
975 | -- Delete everything that we have not seen for 14 days | |
976 | DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days'; | |
977 | """) | |
978 | ||
979 | def _handle_update_announcements_from_bird(self, server): | |
980 | # Pre-compile the regular expression for faster searching | |
dc0be5c5 | 981 | route = re.compile(b"^\s(.+?)\s+.+?\[AS(.*?).\]$") |
22d8d199 MT |
982 | |
983 | log.info("Requesting routing table from Bird (%s)" % server) | |
984 | ||
985 | # Send command to list all routes | |
986 | for line in self._bird_cmd(server, "show route"): | |
987 | m = route.match(line) | |
988 | if not m: | |
989 | log.debug("Could not parse line: %s" % line.decode()) | |
990 | continue | |
991 | ||
992 | # Fetch the extracted network and ASN | |
993 | network, autnum = m.groups() | |
994 | ||
995 | # Insert it into the database | |
996 | self.db.execute("INSERT INTO announcements(network, autnum) \ | |
997 | VALUES(%s, %s) ON CONFLICT (network) DO \ | |
998 | UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP", | |
999 | network.decode(), autnum.decode(), | |
1000 | ) | |
1001 | ||
1002 | def _handle_update_announcements_from_telnet(self, server): | |
83d61c46 | 1003 | # Pre-compile regular expression for routes |
83d61c46 MT |
1004 | route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL) |
1005 | ||
1006 | with telnetlib.Telnet(server) as t: | |
1007 | # Enable debug mode | |
1008 | #if ns.debug: | |
1009 | # t.set_debuglevel(10) | |
1010 | ||
1011 | # Wait for console greeting | |
fcd5b8b2 MT |
1012 | greeting = t.read_until(b"> ", timeout=30) |
1013 | if not greeting: | |
1014 | log.error("Could not get a console prompt") | |
1015 | return 1 | |
83d61c46 MT |
1016 | |
1017 | # Disable pagination | |
1018 | t.write(b"terminal length 0\n") | |
1019 | ||
1020 | # Wait for the prompt to return | |
1021 | t.read_until(b"> ") | |
1022 | ||
1023 | # Fetch the routing tables | |
22d8d199 MT |
1024 | for protocol in ("ipv6", "ipv4"): |
1025 | log.info("Requesting %s routing table" % protocol) | |
83d61c46 | 1026 | |
22d8d199 MT |
1027 | # Request the full unicast routing table |
1028 | t.write(b"show bgp %s unicast\n" % protocol.encode()) | |
83d61c46 | 1029 | |
22d8d199 MT |
1030 | # Read entire header which ends with "Path" |
1031 | t.read_until(b"Path\r\n") | |
83d61c46 | 1032 | |
22d8d199 MT |
1033 | while True: |
1034 | # Try reading a full entry | |
1035 | # Those might be broken across multiple lines but ends with i | |
1036 | line = t.read_until(b"i\r\n", timeout=5) | |
1037 | if not line: | |
1038 | break | |
83d61c46 | 1039 | |
22d8d199 MT |
1040 | # Show line for debugging |
1041 | #log.debug(repr(line)) | |
d773c1bc | 1042 | |
22d8d199 MT |
1043 | # Try finding a route in here |
1044 | m = route.match(line) | |
1045 | if m: | |
1046 | network, autnum = m.groups() | |
83d61c46 | 1047 | |
22d8d199 MT |
1048 | # Convert network to string |
1049 | network = network.decode() | |
83d61c46 | 1050 | |
22d8d199 MT |
1051 | # Append /24 for IPv4 addresses |
1052 | if not "/" in network and not ":" in network: | |
1053 | network = "%s/24" % network | |
83d61c46 | 1054 | |
22d8d199 MT |
1055 | # Convert AS number to integer |
1056 | autnum = int(autnum) | |
83d61c46 | 1057 | |
22d8d199 | 1058 | log.info("Found announcement for %s by %s" % (network, autnum)) |
83d61c46 | 1059 | |
22d8d199 MT |
1060 | self.db.execute("INSERT INTO announcements(network, autnum) \ |
1061 | VALUES(%s, %s) ON CONFLICT (network) DO \ | |
1062 | UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP", | |
1063 | network, autnum, | |
1064 | ) | |
83d61c46 | 1065 | |
22d8d199 | 1066 | log.info("Finished reading the %s routing table" % protocol) |
1d4e4e8f | 1067 | |
22d8d199 MT |
1068 | def _bird_cmd(self, socket_path, command): |
1069 | # Connect to the socket | |
1070 | s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) | |
1071 | s.connect(socket_path) | |
cedee656 | 1072 | |
22d8d199 MT |
1073 | # Allocate some buffer |
1074 | buffer = b"" | |
83d61c46 | 1075 | |
22d8d199 MT |
1076 | # Send the command |
1077 | s.send(b"%s\n" % command.encode()) | |
209c04b6 | 1078 | |
22d8d199 MT |
1079 | while True: |
1080 | # Fill up the buffer | |
1081 | buffer += s.recv(4096) | |
209c04b6 | 1082 | |
22d8d199 MT |
1083 | while True: |
1084 | # Search for the next newline | |
1085 | pos = buffer.find(b"\n") | |
209c04b6 | 1086 | |
22d8d199 MT |
1087 | # If we cannot find one, we go back and read more data |
1088 | if pos <= 0: | |
1089 | break | |
209c04b6 | 1090 | |
22d8d199 MT |
1091 | # Cut after the newline character |
1092 | pos += 1 | |
b89cee80 | 1093 | |
22d8d199 MT |
1094 | # Split the line we want and keep the rest in buffer |
1095 | line, buffer = buffer[:pos], buffer[pos:] | |
83d61c46 | 1096 | |
22d8d199 MT |
1097 | # Look for the end-of-output indicator |
1098 | if line == b"0000 \n": | |
1099 | return | |
83d61c46 | 1100 | |
22d8d199 MT |
1101 | # Otherwise return the line |
1102 | yield line | |
83d61c46 | 1103 | |
d7fc3057 MT |
1104 | def handle_update_overrides(self, ns): |
1105 | with self.db.transaction(): | |
1106 | # Drop all data that we have | |
1107 | self.db.execute(""" | |
1108 | TRUNCATE TABLE autnum_overrides; | |
1109 | TRUNCATE TABLE network_overrides; | |
1110 | """) | |
1111 | ||
dcef2ba4 PM |
1112 | # Update overrides for various cloud providers big enough to publish their own IP |
1113 | # network allocation lists in a machine-readable format... | |
1114 | self._update_overrides_for_aws() | |
1115 | ||
69b3d894 PM |
1116 | # Update overrides for Spamhaus DROP feeds... |
1117 | self._update_overrides_for_spamhaus_drop() | |
1118 | ||
d7fc3057 MT |
1119 | for file in ns.files: |
1120 | log.info("Reading %s..." % file) | |
1121 | ||
1122 | with open(file, "rb") as f: | |
1123 | for type, block in location.importer.read_blocks(f): | |
1124 | if type == "net": | |
1125 | network = block.get("net") | |
1126 | # Try to parse and normalise the network | |
1127 | try: | |
1128 | network = ipaddress.ip_network(network, strict=False) | |
1129 | except ValueError as e: | |
1130 | log.warning("Invalid IP network: %s: %s" % (network, e)) | |
1131 | continue | |
1132 | ||
94dfab8c MT |
1133 | # Prevent that we overwrite all networks |
1134 | if network.prefixlen == 0: | |
1135 | log.warning("Skipping %s: You cannot overwrite default" % network) | |
1136 | continue | |
1137 | ||
d7fc3057 MT |
1138 | self.db.execute(""" |
1139 | INSERT INTO network_overrides( | |
1140 | network, | |
1141 | country, | |
39ee3120 | 1142 | source, |
d7fc3057 MT |
1143 | is_anonymous_proxy, |
1144 | is_satellite_provider, | |
e17e804e PM |
1145 | is_anycast, |
1146 | is_drop | |
39ee3120 | 1147 | ) VALUES (%s, %s, %s, %s, %s, %s, %s) |
d7fc3057 MT |
1148 | ON CONFLICT (network) DO NOTHING""", |
1149 | "%s" % network, | |
1150 | block.get("country"), | |
39ee3120 | 1151 | "manual", |
28d29b7c MT |
1152 | self._parse_bool(block, "is-anonymous-proxy"), |
1153 | self._parse_bool(block, "is-satellite-provider"), | |
1154 | self._parse_bool(block, "is-anycast"), | |
e17e804e | 1155 | self._parse_bool(block, "drop"), |
d7fc3057 MT |
1156 | ) |
1157 | ||
f476cdfd MT |
1158 | elif type == "aut-num": |
1159 | autnum = block.get("aut-num") | |
d7fc3057 MT |
1160 | |
1161 | # Check if AS number begins with "AS" | |
1162 | if not autnum.startswith("AS"): | |
1163 | log.warning("Invalid AS number: %s" % autnum) | |
1164 | continue | |
1165 | ||
1166 | # Strip "AS" | |
1167 | autnum = autnum[2:] | |
1168 | ||
1169 | self.db.execute(""" | |
1170 | INSERT INTO autnum_overrides( | |
1171 | number, | |
1172 | name, | |
bd1aa6a1 | 1173 | country, |
39ee3120 | 1174 | source, |
d7fc3057 MT |
1175 | is_anonymous_proxy, |
1176 | is_satellite_provider, | |
e17e804e PM |
1177 | is_anycast, |
1178 | is_drop | |
39ee3120 | 1179 | ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s) |
d7fc3057 | 1180 | ON CONFLICT DO NOTHING""", |
bd1aa6a1 MT |
1181 | autnum, |
1182 | block.get("name"), | |
1183 | block.get("country"), | |
39ee3120 | 1184 | "manual", |
28d29b7c MT |
1185 | self._parse_bool(block, "is-anonymous-proxy"), |
1186 | self._parse_bool(block, "is-satellite-provider"), | |
1187 | self._parse_bool(block, "is-anycast"), | |
e17e804e | 1188 | self._parse_bool(block, "drop"), |
d7fc3057 MT |
1189 | ) |
1190 | ||
1191 | else: | |
03d24a9b | 1192 | log.warning("Unsupported type: %s" % type) |
d7fc3057 | 1193 | |
dcef2ba4 PM |
1194 | def _update_overrides_for_aws(self): |
1195 | # Download Amazon AWS IP allocation file to create overrides... | |
1196 | downloader = location.importer.Downloader() | |
1197 | ||
1198 | try: | |
1199 | with downloader.request("https://ip-ranges.amazonaws.com/ip-ranges.json", return_blocks=False) as f: | |
1200 | aws_ip_dump = json.load(f.body) | |
1201 | except Exception as e: | |
1202 | log.error("unable to preprocess Amazon AWS IP ranges: %s" % e) | |
1203 | return | |
1204 | ||
1205 | # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately, | |
1206 | # there seems to be no machine-readable version available of this other than | |
1207 | # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html | |
1208 | # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints | |
1209 | # was helpful here as well. | |
1210 | aws_region_country_map = { | |
1211 | "af-south-1": "ZA", | |
1212 | "ap-east-1": "HK", | |
1213 | "ap-south-1": "IN", | |
1214 | "ap-south-2": "IN", | |
1215 | "ap-northeast-3": "JP", | |
1216 | "ap-northeast-2": "KR", | |
1217 | "ap-southeast-1": "SG", | |
1218 | "ap-southeast-2": "AU", | |
1219 | "ap-southeast-3": "MY", | |
1220 | "ap-southeast-4": "AU", | |
1221 | "ap-northeast-1": "JP", | |
1222 | "ca-central-1": "CA", | |
1223 | "eu-central-1": "DE", | |
1224 | "eu-central-2": "CH", | |
1225 | "eu-west-1": "IE", | |
1226 | "eu-west-2": "GB", | |
1227 | "eu-south-1": "IT", | |
1228 | "eu-south-2": "ES", | |
1229 | "eu-west-3": "FR", | |
1230 | "eu-north-1": "SE", | |
ad017947 | 1231 | "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself |
dcef2ba4 PM |
1232 | "me-central-1": "AE", |
1233 | "me-south-1": "BH", | |
1234 | "sa-east-1": "BR" | |
1235 | } | |
1236 | ||
1237 | # Fetch all valid country codes to check parsed networks aganist... | |
1238 | rows = self.db.query("SELECT * FROM countries ORDER BY country_code") | |
1239 | validcountries = [] | |
1240 | ||
1241 | for row in rows: | |
1242 | validcountries.append(row.country_code) | |
1243 | ||
1244 | with self.db.transaction(): | |
1245 | for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]: | |
1246 | try: | |
1247 | network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False) | |
1248 | except ValueError: | |
1249 | log.warning("Unable to parse line: %s" % snetwork) | |
1250 | continue | |
1251 | ||
1252 | # Sanitize parsed networks... | |
1253 | if not self._check_parsed_network(network): | |
1254 | continue | |
1255 | ||
1256 | # Determine region of this network... | |
1257 | region = snetwork["region"] | |
1258 | cc = None | |
1259 | is_anycast = False | |
1260 | ||
1261 | # Any region name starting with "us-" will get "US" country code assigned straight away... | |
1262 | if region.startswith("us-"): | |
1263 | cc = "US" | |
1264 | elif region.startswith("cn-"): | |
1265 | # ... same goes for China ... | |
1266 | cc = "CN" | |
1267 | elif region == "GLOBAL": | |
1268 | # ... funny region name for anycast-like networks ... | |
1269 | is_anycast = True | |
1270 | elif region in aws_region_country_map: | |
1271 | # ... assign looked up country code otherwise ... | |
1272 | cc = aws_region_country_map[region] | |
1273 | else: | |
1274 | # ... and bail out if we are missing something here | |
1275 | log.warning("Unable to determine country code for line: %s" % snetwork) | |
1276 | continue | |
1277 | ||
1278 | # Skip networks with unknown country codes | |
1279 | if not is_anycast and validcountries and cc not in validcountries: | |
1280 | log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \ | |
1281 | (cc, network)) | |
1282 | return | |
1283 | ||
1284 | # Conduct SQL statement... | |
1285 | self.db.execute(""" | |
1286 | INSERT INTO network_overrides( | |
1287 | network, | |
1288 | country, | |
1289 | source, | |
1290 | is_anonymous_proxy, | |
1291 | is_satellite_provider, | |
1292 | is_anycast | |
1293 | ) VALUES (%s, %s, %s, %s, %s, %s) | |
1294 | ON CONFLICT (network) DO NOTHING""", | |
1295 | "%s" % network, | |
1296 | cc, | |
1297 | "Amazon AWS IP feed", | |
1298 | None, | |
1299 | None, | |
1300 | is_anycast, | |
1301 | ) | |
1302 | ||
1303 | ||
69b3d894 PM |
1304 | def _update_overrides_for_spamhaus_drop(self): |
1305 | downloader = location.importer.Downloader() | |
1306 | ||
1307 | ip_urls = [ | |
1308 | "https://www.spamhaus.org/drop/drop.txt", | |
1309 | "https://www.spamhaus.org/drop/edrop.txt", | |
1310 | "https://www.spamhaus.org/drop/dropv6.txt" | |
1311 | ] | |
1312 | ||
1313 | asn_urls = [ | |
1314 | "https://www.spamhaus.org/drop/asndrop.txt" | |
1315 | ] | |
1316 | ||
1317 | for url in ip_urls: | |
1318 | try: | |
1319 | with downloader.request(url, return_blocks=False) as f: | |
1320 | fcontent = f.body.readlines() | |
1321 | except Exception as e: | |
1322 | log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e)) | |
1323 | return | |
1324 | ||
1325 | # Iterate through every line, filter comments and add remaining networks to | |
1326 | # the override table in case they are valid... | |
1327 | with self.db.transaction(): | |
1328 | for sline in fcontent: | |
1329 | ||
1330 | # The response is assumed to be encoded in UTF-8... | |
1331 | sline = sline.decode("utf-8") | |
1332 | ||
1333 | # Comments start with a semicolon... | |
1334 | if sline.startswith(";"): | |
1335 | continue | |
1336 | ||
1337 | # Extract network and ignore anything afterwards... | |
1338 | try: | |
1339 | network = ipaddress.ip_network(sline.split()[0], strict=False) | |
1340 | except ValueError: | |
1341 | log.error("Unable to parse line: %s" % sline) | |
1342 | continue | |
1343 | ||
1344 | # Sanitize parsed networks... | |
1345 | if not self._check_parsed_network(network): | |
1346 | log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \ | |
1347 | (url, network)) | |
1348 | continue | |
1349 | ||
1350 | # Conduct SQL statement... | |
1351 | self.db.execute(""" | |
1352 | INSERT INTO network_overrides( | |
1353 | network, | |
1354 | source, | |
1355 | is_drop | |
1356 | ) VALUES (%s, %s, %s) | |
148f68bf | 1357 | ON CONFLICT (network) DO UPDATE SET is_drop = True""", |
69b3d894 PM |
1358 | "%s" % network, |
1359 | "Spamhaus DROP lists", | |
1360 | True | |
1361 | ) | |
1362 | ||
1363 | for url in asn_urls: | |
1364 | try: | |
1365 | with downloader.request(url, return_blocks=False) as f: | |
1366 | fcontent = f.body.readlines() | |
1367 | except Exception as e: | |
1368 | log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e)) | |
1369 | return | |
1370 | ||
1371 | # Iterate through every line, filter comments and add remaining ASNs to | |
1372 | # the override table in case they are valid... | |
1373 | with self.db.transaction(): | |
1374 | for sline in fcontent: | |
1375 | ||
1376 | # The response is assumed to be encoded in UTF-8... | |
1377 | sline = sline.decode("utf-8") | |
1378 | ||
1379 | # Comments start with a semicolon... | |
1380 | if sline.startswith(";"): | |
1381 | continue | |
1382 | ||
1383 | # Throw away anything after the first space... | |
1384 | sline = sline.split()[0] | |
1385 | ||
1386 | # ... strip the "AS" prefix from it ... | |
1387 | sline = sline.strip("AS") | |
1388 | ||
1389 | # ... and convert it into an integer. Voila. | |
1390 | asn = int(sline) | |
1391 | ||
1392 | # Filter invalid ASNs... | |
1393 | if not self._check_parsed_asn(asn): | |
1394 | log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \ | |
1395 | (url, asn)) | |
1396 | continue | |
1397 | ||
1398 | # Conduct SQL statement... | |
1399 | self.db.execute(""" | |
1400 | INSERT INTO autnum_overrides( | |
1401 | number, | |
1402 | source, | |
1403 | is_drop | |
1404 | ) VALUES (%s, %s, %s) | |
148f68bf | 1405 | ON CONFLICT (number) DO UPDATE SET is_drop = True""", |
69b3d894 PM |
1406 | "%s" % asn, |
1407 | "Spamhaus ASN-DROP list", | |
1408 | True | |
1409 | ) | |
1410 | ||
28d29b7c MT |
1411 | @staticmethod |
1412 | def _parse_bool(block, key): | |
1413 | val = block.get(key) | |
1414 | ||
1415 | # There is no point to proceed when we got None | |
1416 | if val is None: | |
1417 | return | |
1418 | ||
1419 | # Convert to lowercase | |
1420 | val = val.lower() | |
1421 | ||
1422 | # True | |
1423 | if val in ("yes", "1"): | |
1424 | return True | |
1425 | ||
1426 | # False | |
1427 | if val in ("no", "0"): | |
1428 | return False | |
1429 | ||
1430 | # Default to None | |
1431 | return None | |
1432 | ||
8084b33a MT |
1433 | def handle_import_countries(self, ns): |
1434 | with self.db.transaction(): | |
1435 | # Drop all data that we have | |
1436 | self.db.execute("TRUNCATE TABLE countries") | |
1437 | ||
1438 | for file in ns.file: | |
1439 | for line in file: | |
1440 | line = line.rstrip() | |
1441 | ||
1442 | # Ignore any comments | |
1443 | if line.startswith("#"): | |
1444 | continue | |
1445 | ||
1446 | try: | |
1447 | country_code, continent_code, name = line.split(maxsplit=2) | |
1448 | except: | |
1449 | log.warning("Could not parse line: %s" % line) | |
1450 | continue | |
1451 | ||
1452 | self.db.execute("INSERT INTO countries(country_code, name, continent_code) \ | |
1453 | VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code) | |
1454 | ||
6ffd06b5 MT |
1455 | |
1456 | def split_line(line): | |
1457 | key, colon, val = line.partition(":") | |
1458 | ||
1459 | # Strip any excess space | |
1460 | key = key.strip() | |
1461 | val = val.strip() | |
78ff0cf2 | 1462 | |
6ffd06b5 | 1463 | return key, val |
78ff0cf2 MT |
1464 | |
1465 | def main(): | |
1466 | # Run the command line interface | |
1467 | c = CLI() | |
1468 | c.run() | |
1469 | ||
1470 | main() |