]> git.ipfire.org Git - location/location-database.git/blob - tools/base.py
Skip any mntner: blocks
[location/location-database.git] / tools / base.py
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # location-database - A database to determine someone's #
5 # location on the Internet #
6 # Copyright (C) 2018 Michael Tremer #
7 # #
8 # This program is free software: you can redistribute it and/or modify #
9 # it under the terms of the GNU General Public License as published by #
10 # the Free Software Foundation, either version 3 of the License, or #
11 # (at your option) any later version. #
12 # #
13 # This program is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16 # GNU General Public License for more details. #
17 # #
18 # You should have received a copy of the GNU General Public License #
19 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
20 # #
21 ###############################################################################
22
23 import datetime
24 import ipaddress
25 import logging
26 import math
27 import os.path
28 import re
29 import sqlite3
30
31 from . import downloader
32 from . import util
33
34 FILENAME_ASNUMS = "asnums.txt"
35 FILENMAE_NETWORKS = "networks.txt"
36
37 FMT = "%-16s%s\n"
38
39 RE_AS = re.compile(r"^(AS|as)(\d+)")
40
41 INVALID_ADDRESSES = (
42 "0.0.0.0",
43 "::/0",
44 "0::/0",
45 )
46
47 class RIR(object):
48 name = None
49 database_urls = []
50
51 def __init__(self):
52 pass
53
54 def __str__(self):
55 if self.name:
56 return "%s - %s" % (self.__class__.__name__, self.name)
57
58 return self.__class__.__name__
59
60 @property
61 def parser(self):
62 return RIRParser
63
64 def make_path(self, path):
65 return os.path.join(self.__class__.__name__, path)
66
67 @property
68 def filename_asnums(self):
69 return self.make_path(FILENAME_ASNUMS)
70
71 @property
72 def filename_networks(self):
73 return self.make_path(FILENMAE_NETWORKS)
74
75 def update(self, directory):
76 p = self.parser(self)
77
78 # Download all data and store it in memory
79 p.fetch_data()
80
81 # Write the database to disk
82 p.export_database(directory)
83
84
85 class RIRParser(object):
86 def __init__(self, rir):
87 self.rir = rir
88
89 # Create a downloader to fetch data
90 self.downloader = downloader.Downloader()
91
92 # Create a database to hold temporary data
93 self.db = self._make_database(":memory:")
94
95 # Start time
96 self.start_time = datetime.datetime.utcnow()
97
98 def _make_database(self, filename):
99 db = sqlite3.connect(filename)
100
101 # Create database layout
102 with db as cursor:
103 cursor.executescript("""
104 CREATE TABLE IF NOT EXISTS autnums(asn INTEGER, name TEXT, org TEXT);
105
106 CREATE TABLE IF NOT EXISTS inetnums(network TEXT, netname TEXT, country TEXT, description TEXT);
107
108 CREATE TABLE IF NOT EXISTS organisations(handle TEXT, name TEXT, country TEXT);
109 CREATE INDEX IF NOT EXISTS organisations_handle ON organisations(handle);
110
111 CREATE TABLE IF NOT EXISTS routes(route TEXT, asn INTEGER);
112 CREATE INDEX IF NOT EXISTS routes_route ON routes(route);
113 """)
114
115 return db
116
117 def export_database(self, directory):
118 # Write all ASes
119 with open(self.rir.filename_asnums, "w") as f:
120 self._export_asnums(f)
121
122 # Write all networks
123 with open(self.rir.filename_networks, "w") as f:
124 self._export_networks(f)
125
126 def _export_asnums(self, f):
127 # Write header
128 self._write_header(f)
129
130 with self.db as c:
131 res = c.execute("""SELECT DISTINCT autnums.asn, autnums.name,
132 organisations.name, organisations.country FROM autnums
133 LEFT JOIN organisations ON autnums.org = organisations.handle
134 WHERE autnums.asn IS NOT NULL ORDER BY autnums.asn""")
135
136 for row in res:
137 f.write(FMT % ("asnum:", "AS%s" % row[0]))
138
139 if row[1]:
140 f.write(FMT % ("name:", row[1]))
141
142 if row[2]:
143 f.write(FMT % ("org:", row[2]))
144
145 if row[3]:
146 f.write(FMT % ("country:", row[3]))
147
148 # End block
149 f.write("\n")
150
151 def _export_networks(self, f):
152 # Write header
153 self._write_header(f)
154
155 with self.db as c:
156 # Write all networks
157 res = c.execute("""SELECT inetnums.network, routes.asn,
158 inetnums.country, inetnums.netname, inetnums.description
159 FROM inetnums LEFT JOIN routes ON inetnums.network = routes.route
160 ORDER BY routes.asn, inetnums.network""")
161
162 for row in res:
163 net, asn, country, name, description = row
164
165 f.write(FMT % ("net:", net))
166
167 if name:
168 f.write(FMT % ("name:", name))
169
170 if asn:
171 f.write(FMT % ("asnum:", "AS%s" % asn))
172
173 if country:
174 f.write(FMT % ("country:", country))
175
176 if description:
177 for line in description.splitlines():
178 f.write(FMT % ("descr:", line))
179
180 # End the block
181 f.write("\n")
182
183 def _write_header(self, f):
184 f.write("#\n")
185 f.write("# %s\n" % self.rir)
186 f.write("# Generated at %s\n" % self.start_time)
187 f.write("#\n\n")
188
189 def fetch_data(self):
190 if not self.rir.database_urls:
191 raise NotImplementedError("Database URLs not set")
192
193 # Parse entire database in one go
194 for url in self.rir.database_urls:
195 self.parse_url(url)
196
197 def parse_url(self, url):
198 with self.downloader.request(url) as r:
199 for block in r:
200 self.parse_block(block)
201
202 def parse_block(self, block):
203 # Get first line to find out what type of block this is
204 line = block[0]
205
206 # inetnum
207 if line.startswith("inet6num:") or line.startswith("inetnum:"):
208 return self._parse_inetnum_block(block)
209
210 # route
211 elif line.startswith("route6:") or line.startswith("route:"):
212 return self._parse_route_block(block)
213
214 # aut-num
215 elif line.startswith("aut-num:"):
216 return self._parse_autnum_block(block)
217
218 # organisation
219 elif line.startswith("organisation:"):
220 return self._parse_org_block(block)
221
222 # person (ignored)
223 elif line.startswith("person:"):
224 return
225
226 # domain (ignored)
227 elif line.startswith("domain:"):
228 return
229
230 # mntner (ignored)
231 elif line.startswith("mntner:"):
232 return
233
234 # Log any unknown blocks
235 else:
236 logging.warning("Unknown block:")
237 for line in block:
238 logging.warning(line)
239
240 def _parse_inetnum_block(self, block):
241 logging.debug("Parsing inetnum block:")
242
243 inetnum = {}
244 for line in block:
245 logging.debug(line)
246
247 # Split line
248 key, val = util.split_line(line)
249
250 if key == "inetnum":
251 start_address, delim, end_address = val.partition("-")
252
253 # Strip any excess space
254 start_address, end_address = start_address.rstrip(), end_address.strip()
255
256 # Skip invalid blocks
257 if start_address in INVALID_ADDRESSES:
258 return
259
260 # Convert to IP address
261 try:
262 start_address = ipaddress.ip_address(start_address)
263 end_address = ipaddress.ip_address(end_address)
264 except ValueError:
265 logging.warning("Could not parse line: %s" % line)
266 return
267
268 # Set prefix to default
269 prefix = 32
270
271 # Count number of addresses in this subnet
272 num_addresses = int(end_address) - int(start_address)
273 if num_addresses:
274 prefix -= math.log(num_addresses, 2)
275
276 inetnum["inetnum"] = "%s/%.0f" % (start_address, prefix)
277
278 elif key == "inet6num":
279 # Skip invalid blocks
280 if val in INVALID_ADDRESSES:
281 return
282
283 inetnum[key] = val
284
285 elif key == "netname":
286 inetnum[key] = val
287
288 elif key == "country":
289 if val == "UNITED STATES":
290 val = "US"
291
292 inetnum[key] = val.upper()
293
294 elif key == "descr":
295 if key in inetnum:
296 inetnum[key] += "\n%s" % val
297 else:
298 inetnum[key] = val
299
300 # Skip empty objects
301 if not inetnum:
302 return
303
304 with self.db as c:
305 args = (
306 inetnum.get("inet6num") or inetnum.get("inetnum"),
307 inetnum.get("netname"),
308 inetnum.get("country"),
309 inetnum.get("descr"),
310 )
311
312 c.execute("INSERT INTO inetnums(network, netname, country, description) \
313 VALUES(?, ?, ?, ?)", args)
314
315 def _parse_route_block(self, block):
316 logging.debug("Parsing route block:")
317
318 route = {}
319 for line in block:
320 logging.debug(line)
321
322 # Split line
323 key, val = util.split_line(line)
324
325 # Keep any significant data
326 if key in ("route6", "route"):
327 route[key] = val
328
329 elif key == "origin":
330 m = RE_AS.match(val)
331 if m:
332 route["asn"] = m.group(2)
333
334 # Skip empty objects
335 if not route:
336 return
337
338 with self.db as c:
339 args = (
340 route.get("route6") or route.get("route"),
341 route.get("asn"),
342 )
343
344 c.execute("INSERT INTO routes(route, asn) \
345 VALUES(?, ?)", args)
346
347 def _parse_autnum_block(self, block):
348 logging.debug("Parsing autnum block:")
349
350 autnum = {}
351 for line in block:
352 logging.debug(line)
353
354 # Split line
355 key, val = util.split_line(line)
356
357 if key == "aut-num":
358 m = RE_AS.match(val)
359 if m:
360 autnum["asn"] = m.group(2)
361
362 elif key in ("as-name", "org"):
363 autnum[key] = val
364
365 # Skip empty objects
366 if not autnum:
367 return
368
369 with self.db as c:
370 args = (
371 autnum.get("asn"),
372 autnum.get("as-name"),
373 autnum.get("org"),
374 )
375
376 c.execute("INSERT INTO autnums(asn, name, org) \
377 VALUES(?, ?, ?)", args)
378
379 def _parse_org_block(self, block):
380 logging.debug("Parsing org block:")
381
382 org = {}
383 for line in block:
384 logging.debug(line)
385
386 # Split line
387 key, val = util.split_line(line)
388
389 if key in ("organisation", "org-name", "country"):
390 org[key] = val
391
392 # Skip empty objects
393 if not org:
394 return
395
396 with self.db as c:
397 args = (
398 org.get("organisation"),
399 org.get("org-name"),
400 org.get("country"),
401 )
402
403 c.execute("INSERT INTO organisations(handle, name, country) \
404 VALUES(?, ?, ?)", args)