]> git.ipfire.org Git - people/ms/libloc.git/blame - src/python/export.py
export: Flatten the tree before exporting it
[people/ms/libloc.git] / src / python / export.py
CommitLineData
88ef7e9c
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
6# Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import io
21import ipaddress
22import logging
23import os
24import socket
25
fae36e81
MT
26import _location
27
88ef7e9c
MT
28# Initialise logging
29log = logging.getLogger("location.export")
30log.propagate = 1
31
fae36e81
MT
32flags = {
33 _location.NETWORK_FLAG_ANONYMOUS_PROXY : "A1",
34 _location.NETWORK_FLAG_SATELLITE_PROVIDER : "A2",
35 _location.NETWORK_FLAG_ANYCAST : "A3",
36}
37
88ef7e9c
MT
38class OutputWriter(object):
39 suffix = "networks"
40 mode = "w"
41
28c73fa3
PM
42 def __init__(self, db, f, prefix=None, flatten=True):
43 self.db, self.f, self.prefix, self.flatten = db, f, prefix, flatten
43554dc4
MT
44
45 # The previously written network
46 self._last_network = None
88ef7e9c
MT
47
48 # Immediately write the header
49 self._write_header()
50
51 @classmethod
28c73fa3 52 def open(cls, db, filename, **kwargs):
88ef7e9c
MT
53 """
54 Convenience function to open a file
55 """
56 f = open(filename, cls.mode)
57
28c73fa3 58 return cls(db, f, **kwargs)
88ef7e9c
MT
59
60 def __repr__(self):
61 return "<%s f=%s>" % (self.__class__.__name__, self.f)
62
43554dc4
MT
63 def _flatten(self, network):
64 """
65 Checks if the given network needs to be written to file,
66 or if it is a subnet of the previously written network.
67 """
68 if self._last_network and network.is_subnet_of(self._last_network):
69 return True
70
71 # Remember this network for the next call
72 self._last_network = network
73 return False
74
88ef7e9c
MT
75 def _write_header(self):
76 """
77 The header of the file
78 """
79 pass
80
81 def _write_footer(self):
82 """
83 The footer of the file
84 """
85 pass
86
43554dc4 87 def _write_network(self, network):
88ef7e9c
MT
88 self.f.write("%s\n" % network)
89
28c73fa3 90 def write(self, network, subnets):
43554dc4 91 if self.flatten and self._flatten(network):
bbed1fd2 92 log.debug("Skipping writing network %s (last one was %s)" % (network, self._last_network))
43554dc4
MT
93 return
94
bbed1fd2
MT
95 # Convert network into a Python object
96 _network = ipaddress.ip_network("%s" % network)
97
28c73fa3
PM
98 # Write the network when it has no subnets
99 if not subnets:
bbed1fd2
MT
100 log.debug("Writing %s to %s" % (_network, self.f))
101 return self._write_network(_network)
102
103 # Convert subnets into Python objects
104 _subnets = [ipaddress.ip_network("%s" % subnet) for subnet in subnets]
105
106 # Split the network into smaller bits so that
107 # we can accomodate for any gaps in it later
108 to_check = set()
109 for _subnet in _subnets:
110 to_check.update(
111 _network.address_exclude(_subnet)
112 )
113
114 # Clear the list of all subnets
115 subnets = []
116
117 # Check if all subnets to not overlap with anything else
118 while to_check:
119 subnet_to_check = to_check.pop()
28c73fa3 120
bbed1fd2
MT
121 for _subnet in _subnets:
122 # Drop this subnet if it equals one of the subnets
123 # or if it is subnet of one of them
124 if subnet_to_check == _subnet or subnet_to_check.subnet_of(_subnet):
125 break
28c73fa3 126
bbed1fd2
MT
127 # Break it down if it overlaps
128 if subnet_to_check.overlaps(_subnet):
129 to_check.update(
130 subnet_to_check.address_exclude(_subnet)
131 )
132 break
28c73fa3 133
bbed1fd2
MT
134 # Add the subnet again as it passed the check
135 else:
136 subnets.append(subnet_to_check)
28c73fa3
PM
137
138 # Write all networks as compact as possible
bbed1fd2
MT
139 for network in ipaddress.collapse_addresses(subnets):
140 log.debug("Writing %s to %s" % (network, self.f))
28c73fa3 141 self._write_network(network)
43554dc4 142
88ef7e9c
MT
143 def finish(self):
144 """
145 Called when all data has been written
146 """
147 self._write_footer()
148
149 # Close the file
150 self.f.close()
151
152
153class IpsetOutputWriter(OutputWriter):
154 """
155 For ipset
156 """
157 suffix = "ipset"
158
159 def _write_header(self):
160 self.f.write("create %s hash:net family inet hashsize 1024 maxelem 65536\n" % self.prefix)
161
43554dc4 162 def _write_network(self, network):
88ef7e9c
MT
163 self.f.write("add %s %s\n" % (self.prefix, network))
164
165
166class NftablesOutputWriter(OutputWriter):
167 """
168 For nftables
169 """
170 suffix = "set"
171
172 def _write_header(self):
173 self.f.write("define %s = {\n" % self.prefix)
174
175 def _write_footer(self):
176 self.f.write("}\n")
177
43554dc4 178 def _write_network(self, network):
88ef7e9c
MT
179 self.f.write(" %s,\n" % network)
180
181
182class XTGeoIPOutputWriter(OutputWriter):
183 """
184 Formats the output in that way, that it can be loaded by
185 the xt_geoip kernel module from xtables-addons.
186 """
187 suffix = "iv"
188 mode = "wb"
189
43554dc4 190 def _write_network(self, network):
28c73fa3 191 for address in (network.network_address, network.broadcast_address):
2b9338ea 192 # Convert this into a string of bits
88ef7e9c 193 bytes = socket.inet_pton(
28c73fa3 194 socket.AF_INET6 if network.version == 6 else socket.AF_INET, "%s" % address,
88ef7e9c
MT
195 )
196
197 self.f.write(bytes)
198
199
200formats = {
201 "ipset" : IpsetOutputWriter,
202 "list" : OutputWriter,
203 "nftables" : NftablesOutputWriter,
204 "xt_geoip" : XTGeoIPOutputWriter,
205}
206
207class Exporter(object):
208 def __init__(self, db, writer):
209 self.db, self.writer = db, writer
210
211 def export(self, directory, families, countries, asns):
212 for family in families:
213 log.debug("Exporting family %s" % family)
214
215 writers = {}
216
217 # Create writers for countries
218 for country_code in countries:
219 filename = self._make_filename(
220 directory, prefix=country_code, suffix=self.writer.suffix, family=family,
221 )
222
28c73fa3 223 writers[country_code] = self.writer.open(self.db, filename, prefix="CC_%s" % country_code)
88ef7e9c
MT
224
225 # Create writers for ASNs
226 for asn in asns:
227 filename = self._make_filename(
228 directory, "AS%s" % asn, suffix=self.writer.suffix, family=family,
229 )
230
28c73fa3 231 writers[asn] = self.writer.open(self.db, filename, prefix="AS%s" % asn)
88ef7e9c
MT
232
233 # Get all networks that match the family
234 networks = self.db.search_networks(family=family)
235
bbed1fd2
MT
236 # Create a stack with all networks in order where we can put items back
237 # again and retrieve them in the next iteration.
238 networks = BufferedStack(networks)
28c73fa3 239
88ef7e9c 240 # Walk through all networks
bbed1fd2
MT
241 for network in networks:
242 # Collect all networks which are a subnet of network
243 subnets = []
244 for subnet in networks:
245 # If the next subnet was not a subnet, we have to push
246 # it back on the stack and break this loop
247 if not subnet.is_subnet_of(network):
248 networks.push(subnet)
28c73fa3
PM
249 break
250
bbed1fd2
MT
251 subnets.append(subnet)
252
88ef7e9c 253 # Write matching countries
bbed1fd2
MT
254 if network.country_code and network.country_code in writers:
255 # Mismatching subnets
256 gaps = [
257 subnet for subnet in subnets if not network.country_code == subnet.country_code
258 ]
259
260 writers[network.country_code].write(network, gaps)
88ef7e9c
MT
261
262 # Write matching ASNs
bbed1fd2
MT
263 if network.asn and network.asn in writers:
264 # Mismatching subnets
265 gaps = [
266 subnet for subnet in subnets if not network.asn == subnet.asn
267 ]
268
269 writers[network.asn].write(network, gaps)
88ef7e9c 270
fae36e81
MT
271 # Handle flags
272 for flag in flags:
273 if network.has_flag(flag):
274 # Fetch the "fake" country code
275 country = flags[flag]
276
bbed1fd2
MT
277 if not country in writers:
278 continue
279
280 gaps = [
281 subnet for subnet in subnets
282 if not subnet.has_flag(flag)
283 ]
284
285 writers[country].write(network, gaps)
286
287 # Push all subnets back onto the stack
288 for subnet in reversed(subnets):
289 networks.push(subnet)
fae36e81 290
88ef7e9c
MT
291 # Write everything to the filesystem
292 for writer in writers.values():
293 writer.finish()
294
295 def _make_filename(self, directory, prefix, suffix, family):
296 filename = "%s.%s%s" % (
297 prefix, suffix, "6" if family == socket.AF_INET6 else "4"
298 )
299
300 return os.path.join(directory, filename)
bbed1fd2
MT
301
302
303class BufferedStack(object):
304 """
305 This class takes an iterator and when being iterated
306 over it returns objects from that iterator for as long
307 as there are any.
308
309 It additionally has a function to put an item back on
310 the back so that it will be returned again at the next
311 iteration.
312 """
313 def __init__(self, iterator):
314 self.iterator = iterator
315 self.stack = []
316
317 def __iter__(self):
318 return self
319
320 def __next__(self):
321 if self.stack:
322 return self.stack.pop(0)
323
324 return next(self.iterator)
325
326 def push(self, elem):
327 """
328 Takes an element and puts it on the stack
329 """
330 self.stack.insert(0, elem)