]> git.ipfire.org Git - location/location-database.git/blob - tools/base.py
Revert "Replace sqlite with PostgreSQL"
[location/location-database.git] / tools / base.py
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # location-database - A database to determine someone's #
5 # location on the Internet #
6 # Copyright (C) 2018 Michael Tremer #
7 # #
8 # This program is free software: you can redistribute it and/or modify #
9 # it under the terms of the GNU General Public License as published by #
10 # the Free Software Foundation, either version 3 of the License, or #
11 # (at your option) any later version. #
12 # #
13 # This program is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16 # GNU General Public License for more details. #
17 # #
18 # You should have received a copy of the GNU General Public License #
19 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
20 # #
21 ###############################################################################
22
23 import datetime
24 import ipaddress
25 import logging
26 import math
27 import os.path
28 import re
29 import sqlite3
30
31 from . import downloader
32 from . import util
33
34 FILENAME_ASNUMS = "asnums.txt"
35 FILENMAE_NETWORKS = "networks.txt"
36
37 FMT = "%-16s%s\n"
38
39 RE_AS = re.compile(r"^(AS|as)(\d+)")
40
41 INVALID_ADDRESSES = (
42 "0.0.0.0",
43 "::/0",
44 "0::/0",
45 )
46
47 class RIR(object):
48 name = None
49 database_urls = []
50
51 def __init__(self):
52 pass
53
54 def __str__(self):
55 if self.name:
56 return "%s - %s" % (self.__class__.__name__, self.name)
57
58 return self.__class__.__name__
59
60 @property
61 def parser(self):
62 return RIRParser
63
64 def make_path(self, path):
65 return os.path.join(self.__class__.__name__, path)
66
67 @property
68 def filename_asnums(self):
69 return self.make_path(FILENAME_ASNUMS)
70
71 @property
72 def filename_networks(self):
73 return self.make_path(FILENMAE_NETWORKS)
74
75 def update(self, directory):
76 p = self.parser(self)
77
78 # Download all data and store it in memory
79 p.fetch_data()
80
81 # Write the database to disk
82 p.export_database(directory)
83
84
85 class RIRParser(object):
86 def __init__(self, rir):
87 self.rir = rir
88
89 # Create a downloader to fetch data
90 self.downloader = downloader.Downloader()
91
92 # Create a database to hold temporary data
93 self.db = self._make_database(":memory:")
94
95 # Start time
96 self.start_time = datetime.datetime.utcnow()
97
98 def _make_database(self, filename):
99 db = sqlite3.connect(filename)
100
101 # Create database layout
102 with db as cursor:
103 cursor.executescript("""
104 CREATE TABLE IF NOT EXISTS autnums(asn INTEGER, name TEXT, org TEXT);
105
106 CREATE TABLE IF NOT EXISTS inetnums(network TEXT, netname TEXT, country TEXT, description TEXT);
107
108 CREATE TABLE IF NOT EXISTS organisations(handle TEXT, name TEXT, country TEXT);
109 CREATE INDEX IF NOT EXISTS organisations_handle ON organisations(handle);
110
111 CREATE TABLE IF NOT EXISTS routes(route TEXT, asn INTEGER);
112 CREATE INDEX IF NOT EXISTS routes_route ON routes(route);
113 """)
114
115 return db
116
117 def export_database(self, directory):
118 # Write all ASes
119 with open(self.rir.filename_asnums, "w") as f:
120 self._export_asnums(f)
121
122 # Write all networks
123 with open(self.rir.filename_networks, "w") as f:
124 self._export_networks(f)
125
126 def _export_asnums(self, f):
127 # Write header
128 self._write_header(f)
129
130 with self.db as c:
131 res = c.execute("""SELECT DISTINCT autnums.asn, autnums.name,
132 organisations.name, organisations.country FROM autnums
133 LEFT JOIN organisations ON autnums.org = organisations.handle
134 WHERE autnums.asn IS NOT NULL ORDER BY autnums.asn""")
135
136 for row in res:
137 f.write(FMT % ("asnum:", "AS%s" % row[0]))
138
139 if row[1]:
140 f.write(FMT % ("name:", row[1]))
141
142 if row[2]:
143 f.write(FMT % ("org:", row[2]))
144
145 if row[3]:
146 f.write(FMT % ("country:", row[3]))
147
148 # End block
149 f.write("\n")
150
151 def _export_networks(self, f):
152 # Write header
153 self._write_header(f)
154
155 with self.db as c:
156 # Write all networks
157 res = c.execute("""SELECT inetnums.network, routes.asn,
158 inetnums.country, inetnums.netname, inetnums.description
159 FROM inetnums LEFT JOIN routes ON inetnums.network = routes.route
160 ORDER BY routes.asn, inetnums.network""")
161
162 for row in res:
163 net, asn, country, name, description = row
164
165 f.write(FMT % ("net:", net))
166
167 if name:
168 f.write(FMT % ("name:", name))
169
170 if asn:
171 f.write(FMT % ("asnum:", "AS%s" % asn))
172
173 if country:
174 f.write(FMT % ("country:", country))
175
176 if description:
177 for line in description.splitlines():
178 f.write(FMT % ("descr:", line))
179
180 # End the block
181 f.write("\n")
182
183 def _write_header(self, f):
184 f.write("#\n")
185 f.write("# %s\n" % self.rir)
186 f.write("# Generated at %s\n" % self.start_time)
187 f.write("#\n\n")
188
189 def fetch_data(self):
190 if not self.rir.database_urls:
191 raise NotImplementedError("Database URLs not set")
192
193 # Parse entire database in one go
194 for url in self.rir.database_urls:
195 self.parse_url(url)
196
197 self.db.commit()
198
199 def parse_url(self, url):
200 with self.downloader.request(url) as r:
201 for block in r:
202 self.parse_block(block)
203
204 def parse_block(self, block):
205 # Get first line to find out what type of block this is
206 line = block[0]
207
208 # inetnum
209 if line.startswith("inet6num:") or line.startswith("inetnum:"):
210 return self._parse_inetnum_block(block)
211
212 # route
213 elif line.startswith("route6:") or line.startswith("route:"):
214 return self._parse_route_block(block)
215
216 # aut-num
217 elif line.startswith("aut-num:"):
218 return self._parse_autnum_block(block)
219
220 # organisation
221 elif line.startswith("organisation:"):
222 return self._parse_org_block(block)
223
224 # person (ignored)
225 elif line.startswith("person:"):
226 return
227
228 # domain (ignored)
229 elif line.startswith("domain:"):
230 return
231
232 # mntner (ignored)
233 elif line.startswith("mntner:"):
234 return
235
236 # as-block (ignored)
237 elif line.startswith("as-block:"):
238 return
239
240 # as-set (ignored)
241 elif line.startswith("as-set:"):
242 return
243
244 # route-set (ignored)
245 elif line.startswith("route-set:"):
246 return
247
248 # role (ignored)
249 elif line.startswith("role:"):
250 return
251
252 # key-cert (ignored)
253 elif line.startswith("key-cert:"):
254 return
255
256 # irt (ignored)
257 elif line.startswith("irt:"):
258 return
259
260 # Log any unknown blocks
261 else:
262 logging.warning("Unknown block:")
263 for line in block:
264 logging.warning(line)
265
266 def _parse_inetnum_block(self, block):
267 logging.debug("Parsing inetnum block:")
268
269 inetnum = {}
270 for line in block:
271 logging.debug(line)
272
273 # Split line
274 key, val = util.split_line(line)
275
276 if key == "inetnum":
277 start_address, delim, end_address = val.partition("-")
278
279 # Strip any excess space
280 start_address, end_address = start_address.rstrip(), end_address.strip()
281
282 # Skip invalid blocks
283 if start_address in INVALID_ADDRESSES:
284 return
285
286 # Convert to IP address
287 try:
288 start_address = ipaddress.ip_address(start_address)
289 end_address = ipaddress.ip_address(end_address)
290 except ValueError:
291 logging.warning("Could not parse line: %s" % line)
292 return
293
294 # Set prefix to default
295 prefix = 32
296
297 # Count number of addresses in this subnet
298 num_addresses = int(end_address) - int(start_address)
299 if num_addresses:
300 prefix -= math.log(num_addresses, 2)
301
302 inetnum["inetnum"] = "%s/%.0f" % (start_address, prefix)
303
304 elif key == "inet6num":
305 # Skip invalid blocks
306 if val in INVALID_ADDRESSES:
307 return
308
309 inetnum[key] = val
310
311 elif key == "netname":
312 inetnum[key] = val
313
314 elif key == "country":
315 if val == "UNITED STATES":
316 val = "US"
317
318 inetnum[key] = val.upper()
319
320 elif key == "descr":
321 if key in inetnum:
322 inetnum[key] += "\n%s" % val
323 else:
324 inetnum[key] = val
325
326 # Skip empty objects
327 if not inetnum:
328 return
329
330 with self.db as c:
331 args = (
332 inetnum.get("inet6num") or inetnum.get("inetnum"),
333 inetnum.get("netname"),
334 inetnum.get("country"),
335 inetnum.get("descr"),
336 )
337
338 c.execute("INSERT INTO inetnums(network, netname, country, description) \
339 VALUES(?, ?, ?, ?)", args)
340
341 def _parse_route_block(self, block):
342 logging.debug("Parsing route block:")
343
344 route = {}
345 for line in block:
346 logging.debug(line)
347
348 # Split line
349 key, val = util.split_line(line)
350
351 # Keep any significant data
352 if key in ("route6", "route"):
353 route[key] = val
354
355 elif key == "origin":
356 m = RE_AS.match(val)
357 if m:
358 route["asn"] = m.group(2)
359
360 # Skip empty objects
361 if not route:
362 return
363
364 with self.db as c:
365 args = (
366 route.get("route6") or route.get("route"),
367 route.get("asn"),
368 )
369
370 c.execute("INSERT INTO routes(route, asn) \
371 VALUES(?, ?)", args)
372
373 def _parse_autnum_block(self, block):
374 logging.debug("Parsing autnum block:")
375
376 autnum = {}
377 for line in block:
378 logging.debug(line)
379
380 # Split line
381 key, val = util.split_line(line)
382
383 if key == "aut-num":
384 m = RE_AS.match(val)
385 if m:
386 autnum["asn"] = m.group(2)
387
388 elif key in ("as-name", "org"):
389 autnum[key] = val
390
391 # Skip empty objects
392 if not autnum:
393 return
394
395 with self.db as c:
396 args = (
397 autnum.get("asn"),
398 autnum.get("as-name"),
399 autnum.get("org"),
400 )
401
402 c.execute("INSERT INTO autnums(asn, name, org) \
403 VALUES(?, ?, ?)", args)
404
405 def _parse_org_block(self, block):
406 logging.debug("Parsing org block:")
407
408 org = {}
409 for line in block:
410 logging.debug(line)
411
412 # Split line
413 key, val = util.split_line(line)
414
415 if key in ("organisation", "org-name", "country"):
416 org[key] = val
417
418 # Skip empty objects
419 if not org:
420 return
421
422 with self.db as c:
423 args = (
424 org.get("organisation"),
425 org.get("org-name"),
426 org.get("country"),
427 )
428
429 c.execute("INSERT INTO organisations(handle, name, country) \
430 VALUES(?, ?, ?)", args)