]> git.ipfire.org Git - people/ms/libloc.git/blame - src/python/location-importer.in
python: Parse classic RIR data in one transaction and aggregate asnums
[people/ms/libloc.git] / src / python / location-importer.in
CommitLineData
78ff0cf2
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
6# Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import argparse
6ffd06b5 21import ipaddress
78ff0cf2 22import logging
6ffd06b5
MT
23import math
24import re
78ff0cf2
MT
25import sys
26
27# Load our location module
28import location
29c6fa22 29import location.database
3192b66c 30import location.importer
78ff0cf2
MT
31from location.i18n import _
32
33# Initialise logging
34log = logging.getLogger("location.importer")
35log.propagate = 1
36
6ffd06b5
MT
37INVALID_ADDRESSES = (
38 "0.0.0.0",
39 "::/0",
40 "0::/0",
41)
42
78ff0cf2
MT
43class CLI(object):
44 def parse_cli(self):
45 parser = argparse.ArgumentParser(
46 description=_("Location Importer Command Line Interface"),
47 )
6ffd06b5 48 subparsers = parser.add_subparsers()
78ff0cf2
MT
49
50 # Global configuration flags
51 parser.add_argument("--debug", action="store_true",
52 help=_("Enable debug output"))
53
54 # version
55 parser.add_argument("--version", action="version",
56 version="%(prog)s @VERSION@")
57
29c6fa22
MT
58 # Database
59 parser.add_argument("--database-host", required=True,
60 help=_("Database Hostname"), metavar=_("HOST"))
61 parser.add_argument("--database-name", required=True,
62 help=_("Database Name"), metavar=_("NAME"))
63 parser.add_argument("--database-username", required=True,
64 help=_("Database Username"), metavar=_("USERNAME"))
65 parser.add_argument("--database-password", required=True,
66 help=_("Database Password"), metavar=_("PASSWORD"))
67
6ffd06b5
MT
68 # Update WHOIS
69 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
70 update_whois.set_defaults(func=self.handle_update_whois)
71
78ff0cf2
MT
72 args = parser.parse_args()
73
74 # Enable debug logging
75 if args.debug:
76 log.setLevel(logging.DEBUG)
77
6ffd06b5
MT
78 # Print usage if no action was given
79 if not "func" in args:
80 parser.print_usage()
81 sys.exit(2)
82
78ff0cf2
MT
83 return args
84
85 def run(self):
86 # Parse command line arguments
87 args = self.parse_cli()
88
29c6fa22 89 # Initialise database
6ffd06b5 90 self.db = self._setup_database(args)
29c6fa22 91
78ff0cf2 92 # Call function
6ffd06b5 93 ret = args.func(args)
78ff0cf2
MT
94
95 # Return with exit code
96 if ret:
97 sys.exit(ret)
98
99 # Otherwise just exit
100 sys.exit(0)
101
29c6fa22
MT
102 def _setup_database(self, ns):
103 """
104 Initialise the database
105 """
106 # Connect to database
107 db = location.database.Connection(
108 host=ns.database_host, database=ns.database_name,
109 user=ns.database_username, password=ns.database_password,
110 )
111
112 with db.transaction():
113 db.execute("""
6ffd06b5 114 -- autnums
0365119d 115 CREATE TABLE IF NOT EXISTS autnums(number integer, name text);
6ffd06b5
MT
116 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
117
429a43d1
MT
118 -- networks
119 CREATE TABLE IF NOT EXISTS networks(network inet, autnum integer, country text);
120 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
29c6fa22
MT
121 """)
122
123 return db
124
6ffd06b5
MT
125 def handle_update_whois(self, ns):
126 downloader = location.importer.Downloader()
127
128 # Download all sources
0365119d
MT
129 with self.db.transaction():
130 # Create some temporary tables to store parsed data
131 self.db.execute("""
132 CREATE TEMPORARY TABLE _autnums(number integer, organization text)
133 ON COMMIT DROP;
134 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
135
136 CREATE TEMPORARY TABLE _organizations(handle text, name text)
137 ON COMMIT DROP;
138 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
139 """)
140
141 for source in location.importer.WHOIS_SOURCES:
6ffd06b5
MT
142 with downloader.request(source, return_blocks=True) as f:
143 for block in f:
144 self._parse_block(block)
145
0365119d
MT
146 self.db.execute("""
147 INSERT INTO autnums(number, name)
148 SELECT _autnums.number, _organizations.name FROM _autnums
149 LEFT JOIN _organizations ON _autnums.organization = _organizations.handle
150 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
151 """)
152
429a43d1
MT
153 # Download all extended sources
154 for source in location.importer.EXTENDED_SOURCES:
155 with self.db.transaction():
156 # Create some temporary tables to store parsed data
157 self.db.execute("""
158 CREATE TEMPORARY TABLE _autnums(number integer, organization text)
159 ON COMMIT DROP;
160 CREATE INDEX _autnums_organization ON _autnums(organization);
161
162 CREATE TEMPORARY TABLE _inetnums(network inet, country text, organization text)
163 ON COMMIT DROP;
164 CREATE INDEX _inetnums_organization ON _inetnums(organization);
165 """)
166
167 # Download data
168 with downloader.request(source) as f:
169 for line in f:
170 self._parse_line(line)
171
172 # Store information in networks table
173 self.db.execute("""
174 INSERT INTO networks(network, autnum, country)
175 SELECT _inetnums.network, _autnums.number, _inetnums.country FROM _inetnums
176 LEFT JOIN _autnums ON _inetnums.organization = _autnums.organization
177 ORDER BY _autnums.number
178 ON CONFLICT (network) DO NOTHING;
179 """)
180
6ffd06b5
MT
181 def _parse_block(self, block):
182 # Get first line to find out what type of block this is
183 line = block[0]
184
6ffd06b5 185 # aut-num
429a43d1 186 if line.startswith("aut-num:"):
6ffd06b5
MT
187 return self._parse_autnum_block(block)
188
189 # organisation
190 elif line.startswith("organisation:"):
191 return self._parse_org_block(block)
192
6ffd06b5 193 def _parse_autnum_block(self, block):
6ffd06b5
MT
194 autnum = {}
195 for line in block:
196 # Split line
197 key, val = split_line(line)
198
199 if key == "aut-num":
200 m = re.match(r"^(AS|as)(\d+)", val)
201 if m:
202 autnum["asn"] = m.group(2)
203
0365119d 204 elif key == "org":
6ffd06b5
MT
205 autnum[key] = val
206
207 # Skip empty objects
208 if not autnum:
209 return
210
211 # Insert into database
0365119d
MT
212 self.db.execute("INSERT INTO _autnums(number, organization) \
213 VALUES(%s, %s) ON CONFLICT (number) DO UPDATE SET \
214 organization = excluded.organization",
215 autnum.get("asn"), autnum.get("org"),
6ffd06b5
MT
216 )
217
6ffd06b5
MT
218 def _parse_org_block(self, block):
219 org = {}
220 for line in block:
221 # Split line
222 key, val = split_line(line)
223
0365119d 224 if key in ("organisation", "org-name"):
6ffd06b5
MT
225 org[key] = val
226
227 # Skip empty objects
228 if not org:
229 return
230
0365119d
MT
231 self.db.execute("INSERT INTO _organizations(handle, name) \
232 VALUES(%s, %s) ON CONFLICT (handle) DO \
233 UPDATE SET name = excluded.name",
234 org.get("organisation"), org.get("org-name"),
6ffd06b5
MT
235 )
236
429a43d1
MT
237 def _parse_line(self, line):
238 # Skip version line
239 if line.startswith("2"):
240 return
6ffd06b5 241
429a43d1
MT
242 # Skip comments
243 if line.startswith("#"):
244 return
6ffd06b5 245
429a43d1
MT
246 try:
247 registry, country_code, type, line = line.split("|", 3)
248 except:
249 log.warning("Could not parse line: %s" % line)
250 return
6ffd06b5 251
429a43d1
MT
252 # Skip any lines that are for stats only
253 if country_code == "*":
6ffd06b5
MT
254 return
255
429a43d1
MT
256 if type in ("ipv6", "ipv4"):
257 return self._parse_ip_line(country_code, type, line)
258
259 elif type == "asn":
260 return self._parse_asn_line(country_code, line)
6ffd06b5 261
429a43d1
MT
262 else:
263 log.warning("Unknown line type: %s" % type)
264 return
265
266 def _parse_ip_line(self, country, type, line):
267 try:
268 address, prefix, date, status, organization = line.split("|")
269 except ValueError:
270 organization = None
271
272 # Try parsing the line without organization
273 try:
274 address, prefix, date, status = line.split("|")
275 except ValueError:
276 log.warning("Unhandled line format: %s" % line)
277 return
278
279 # Skip anything that isn't properly assigned
280 if not status in ("assigned", "allocated"):
281 return
282
283 # Cast prefix into an integer
284 try:
285 prefix = int(prefix)
286 except:
287 log.warning("Invalid prefix: %s" % prefix)
288
289 # Fix prefix length for IPv4
290 if type == "ipv4":
291 prefix = 32 - int(math.log(prefix, 2))
292
293 # Try to parse the address
294 try:
295 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
296 except ValueError:
297 log.warning("Invalid IP address: %s" % address)
298 return
299
300 self.db.execute("INSERT INTO _inetnums(network, country, organization) \
301 VALUES(%s, %s, %s)", "%s" % network, country, organization,
6ffd06b5
MT
302 )
303
429a43d1
MT
304 def _parse_asn_line(self, country, line):
305 try:
306 asn, dunno, date, status, org_id = line.split("|")
307 except ValueError:
308 org_id = None
309
310 # Try parsing the line without org_id
311 try:
312 asn, dunno, date, status = line.split("|")
313 except ValueError:
314 log.warning("Could not parse line: %s" % line)
315 return
316
317 # Skip anything that isn't properly assigned
318 if not status in ("assigned", "allocated"):
319 return
320
321 self.db.execute("INSERT INTO _autnums(number, organization) \
322 VALUES(%s, %s)", asn, org_id)
323
6ffd06b5
MT
324
325def split_line(line):
326 key, colon, val = line.partition(":")
327
328 # Strip any excess space
329 key = key.strip()
330 val = val.strip()
78ff0cf2 331
6ffd06b5 332 return key, val
78ff0cf2
MT
333
334def main():
335 # Run the command line interface
336 c = CLI()
337 c.run()
338
339main()