]> git.ipfire.org Git - location/location-database.git/blob - tools/base.py
tools: Attempt parsing lines without an organisation id
[location/location-database.git] / tools / base.py
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # location-database - A database to determine someone's #
5 # location on the Internet #
6 # Copyright (C) 2018 Michael Tremer #
7 # #
8 # This program is free software: you can redistribute it and/or modify #
9 # it under the terms of the GNU General Public License as published by #
10 # the Free Software Foundation, either version 3 of the License, or #
11 # (at your option) any later version. #
12 # #
13 # This program is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16 # GNU General Public License for more details. #
17 # #
18 # You should have received a copy of the GNU General Public License #
19 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
20 # #
21 ###############################################################################
22
23 import datetime
24 import ipaddress
25 import logging
26 import math
27 import os.path
28 import re
29 import sqlite3
30 import struct
31
32 from . import downloader
33 from . import util
34
35 FILENAME_ASNUMS = "asnums.txt"
36 FILENMAE_NETWORKS = "networks.txt"
37
38 FMT = "%-16s%s\n"
39
40 RE_AS = re.compile(r"^(AS|as)(\d+)")
41
42 INVALID_ADDRESSES = (
43 "0.0.0.0",
44 "::/0",
45 "0::/0",
46 )
47
48 class RIR(object):
49 name = None
50 database_urls = []
51
52 def __init__(self):
53 pass
54
55 def __str__(self):
56 if self.name:
57 return "%s - %s" % (self.__class__.__name__, self.name)
58
59 return self.__class__.__name__
60
61 @property
62 def parser(self):
63 return RIRParser
64
65 def make_path(self, path):
66 return os.path.join(self.__class__.__name__, path)
67
68 @property
69 def filename_asnums(self):
70 return self.make_path(FILENAME_ASNUMS)
71
72 @property
73 def filename_networks(self):
74 return self.make_path(FILENMAE_NETWORKS)
75
76 def update(self, directory):
77 p = self.parser(self)
78
79 # Download all data and store it in memory
80 p.fetch_data()
81
82 # Write the database to disk
83 p.export_database(directory)
84
85
86 class RIRParser(object):
87 def __init__(self, rir):
88 self.rir = rir
89
90 # Create a downloader to fetch data
91 self.downloader = downloader.Downloader()
92
93 # Create a database to hold temporary data
94 self.db = self._make_database(":memory:")
95
96 # Start time
97 self.start_time = datetime.datetime.utcnow()
98
99 def _make_database(self, filename):
100 db = sqlite3.connect(filename)
101 db.set_trace_callback(logging.debug)
102
103 # Create database layout
104 with db as cursor:
105 cursor.executescript("""
106 CREATE TABLE IF NOT EXISTS autnums(asn INTEGER, country TEXT, org_id INTEGER, date DATE);
107 CREATE INDEX autnums_org_id ON autnums(org_id);
108
109 CREATE TABLE IF NOT EXISTS inetnums(network TEXT, country TEXT, org_id INTEGER,
110 family INTEGER, address_start BLOB, address_end BLOB, prefix INTEGER, date DATE);
111 CREATE INDEX inetnums_sort ON inetnums(address_start);
112 """)
113
114 return db
115
116 def export_database(self, directory):
117 # Write all ASes
118 with open(self.rir.filename_asnums, "w") as f:
119 self._export_asnums(f)
120
121 # Write all networks
122 with open(self.rir.filename_networks, "w") as f:
123 self._export_networks(f)
124
125 def _write_header(self, f):
126 f.write("#\n")
127 f.write("# %s\n" % self.rir)
128 f.write("# Generated at %s\n" % self.start_time)
129 f.write("#\n\n")
130
131 def fetch_data(self):
132 if not self.rir.database_urls:
133 raise NotImplementedError("Database URLs not set")
134
135 # Parse entire database in one go
136 for url in self.rir.database_urls:
137 self.parse_url(url)
138
139 self.db.commit()
140
141 def parse_url(self, url):
142 with self.downloader.request(url) as r:
143 for i, line in enumerate(r):
144 # Skip the first line
145 if i == 0:
146 continue
147
148 self.parse_line(line)
149
150 def parse_line(self, line):
151 # Skip comments
152 if line.startswith("#"):
153 return
154
155 try:
156 registry, country_code, type, line = line.split("|", 3)
157 except:
158 logging.warning("Could not parse line: %s" % line)
159 return
160
161 # Skip any lines that are for stats only
162 if country_code == "*":
163 return
164
165 if type in ("ipv6", "ipv4"):
166 return self._parse_ip_line(country_code, type, line)
167
168 elif type == "asn":
169 return self._parse_asn_line(country_code, line)
170
171 else:
172 logging.warning("Unknown line type: %s" % type)
173 return
174
175 def _parse_ip_line(self, country_code, type, line):
176 try:
177 address, prefix, date, status, org_id = line.split("|")
178 except ValueError:
179 org_id = None
180
181 # Try parsing the line without org_id
182 try:
183 address, prefix, date, status = line.split("|")
184 except ValueError:
185 logging.warning("Unhandled line format: %s" % line)
186 return
187
188 # Skip anything that isn't properly assigned
189 if not status in ("assigned", "allocated"):
190 return
191
192 # Cast prefix into an integer
193 try:
194 prefix = int(prefix)
195 except:
196 logging.warning("Invalid prefix: %s" % prefix)
197
198 # Fix prefix length for IPv4
199 if type == "ipv4":
200 prefix = 32 - int(math.log(prefix, 2))
201
202 # Parse date
203 try:
204 date = datetime.datetime.strptime(date, "%Y%m%d")
205 except ValueError:
206 logging.warning("Could not parse date: %s" % date)
207 return
208
209 # Try to parse the address
210 try:
211 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
212 except ValueError:
213 logging.warning("Invalid IP address: %s" % address)
214 return
215
216 with self.db as c:
217 # Get the first and last address of this network
218 address_start, address_end = int(network.network_address), int(network.broadcast_address)
219
220 args = (
221 "%s" % network,
222 country_code,
223 org_id,
224 network.version,
225 struct.pack(">QQ", address_start >> 64, address_start % (2 ** 64)),
226 struct.pack(">QQ", address_end >> 64, address_end % (2 ** 64)),
227 network.prefixlen,
228 date,
229 )
230
231 c.execute("INSERT INTO inetnums(network, country, org_id, \
232 family, address_start, address_end, prefix, date) \
233 VALUES(?, ?, ?, ?, ?, ?, ?, ?)", args)
234
235 def _parse_asn_line(self, country_code, line):
236 try:
237 asn, dunno, date, status, org_id = line.split("|")
238 except ValueError:
239 org_id = None
240
241 # Try parsing the line without org_id
242 try:
243 asn, dunno, date, status = line.split("|")
244 except ValueError:
245 logging.warning("Could not parse line: %s" % line)
246 return
247
248 # Skip anything that isn't properly assigned
249 if not status in ("assigned", "allocated"):
250 return
251
252 # Parse date
253 try:
254 date = datetime.datetime.strptime(date, "%Y%m%d")
255 except ValueError:
256 logging.warning("Could not parse date: %s" % date)
257 return
258
259 with self.db as c:
260 args = (
261 asn,
262 country_code,
263 org_id,
264 date,
265 )
266
267 c.execute("INSERT INTO autnums(asn, country, org_id, date) \
268 VALUES(?, ?, ?, ?)", args)
269
270 def _export_networks(self, f):
271 # Write header
272 self._write_header(f)
273
274 with self.db as c:
275 # Write all networks
276 res = c.execute("""
277 SELECT inetnums.network,
278 autnums.asn,
279 inetnums.address_start,
280 inetnums.country,
281 STRFTIME('%Y-%m-%d', inetnums.date)
282 FROM inetnums
283 LEFT JOIN autnums
284 WHERE inetnums.org_id = autnums.org_id
285 ORDER BY inetnums.address_start
286 """)
287
288 for row in res:
289 net, asn, address_start, country, date = row
290
291 f.write(FMT % ("net:", net))
292
293 if asn:
294 f.write(FMT % ("asnum:", "AS%s" % asn))
295
296 if country:
297 f.write(FMT % ("country:", country))
298
299 if date:
300 f.write(FMT % ("assigned:", date))
301
302 # End the block
303 f.write("\n")
304
305 def _export_asnums(self, f):
306 # Write header
307 self._write_header(f)
308
309 with self.db as c:
310 res = c.execute("SELECT DISTINCT autnums.asn, autnums.country, \
311 STRFTIME('%Y-%m-%d', autnums.date) FROM autnums ORDER BY autnums.asn")
312
313 for row in res:
314 asn, country, date = row
315
316 f.write(FMT % ("asnum:", "AS%s" % asn))
317
318 if country:
319 f.write(FMT % ("country:", country))
320
321 if date:
322 f.write(FMT % ("assigned:", date))
323
324 # End block
325 f.write("\n")