]> git.ipfire.org Git - people/ms/libloc.git/blob - src/python/export.py
10cf2b79bcafad99f9e7d61af431e23d855ab774
[people/ms/libloc.git] / src / python / export.py
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020-2021 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import io
21 import ipaddress
22 import logging
23 import math
24 import os
25 import socket
26
27 import _location
28
29 # Initialise logging
30 log = logging.getLogger("location.export")
31 log.propagate = 1
32
33 FLAGS = {
34 _location.NETWORK_FLAG_ANONYMOUS_PROXY : "A1",
35 _location.NETWORK_FLAG_SATELLITE_PROVIDER : "A2",
36 _location.NETWORK_FLAG_ANYCAST : "A3",
37 _location.NETWORK_FLAG_DROP : "XD",
38 }
39
40 class OutputWriter(object):
41 suffix = "networks"
42 mode = "w"
43
44 def __init__(self, f, family=None, prefix=None):
45 self.f = f
46 self.prefix = prefix
47 self.family = family
48
49 # Call any custom initialization
50 self.init()
51
52 # Immediately write the header
53 self._write_header()
54
55 def init(self):
56 """
57 To be overwritten by anything that inherits from this
58 """
59 pass
60
61 @classmethod
62 def open(cls, filename, *args, **kwargs):
63 """
64 Convenience function to open a file
65 """
66 f = open(filename, cls.mode)
67
68 return cls(f, *args, **kwargs)
69
70 def __repr__(self):
71 return "<%s f=%s>" % (self.__class__.__name__, self.f)
72
73 def _write_header(self):
74 """
75 The header of the file
76 """
77 pass
78
79 def _write_footer(self):
80 """
81 The footer of the file
82 """
83 pass
84
85 def write(self, network):
86 self.f.write("%s\n" % network)
87
88 def finish(self):
89 """
90 Called when all data has been written
91 """
92 self._write_footer()
93
94 # Close the file
95 self.f.close()
96
97
98 class IpsetOutputWriter(OutputWriter):
99 """
100 For ipset
101 """
102 suffix = "ipset"
103
104 # The value is being used if we don't know any better
105 DEFAULT_HASHSIZE = 64
106
107 # We aim for this many networks in a bucket on average. This allows us to choose
108 # how much memory we want to sacrifice to gain better performance. The lower the
109 # factor, the faster a lookup will be, but it will use more memory.
110 # We will aim for only using three quarters of all buckets to avoid any searches
111 # through the linked lists.
112 HASHSIZE_FACTOR = 0.75
113
114 def init(self):
115 # Count all networks
116 self.networks = 0
117
118 @property
119 def hashsize(self):
120 """
121 Calculates an optimized hashsize
122 """
123 # Return the default value if we don't know the size of the set
124 if not self.networks:
125 return self.DEFAULT_HASHSIZE
126
127 # Find the nearest power of two that is larger than the number of networks
128 # divided by the hashsize factor.
129 exponent = math.log(self.networks / self.HASHSIZE_FACTOR, 2)
130
131 # Return the size of the hash
132 return 2 ** math.ceil(exponent)
133
134 @property
135 def maxelem(self):
136 """
137 Tells ipset how large the set will be.
138
139 Since these are considered immutable, we will use the total number of networks.
140 """
141 return self.networks
142
143 def _write_header(self):
144 # This must have a fixed size, because we will write the header again in the end
145 self.f.write("create %s hash:net family inet%s" % (
146 self.prefix,
147 "6" if self.family == socket.AF_INET6 else ""
148 ))
149 self.f.write(" hashsize %8d maxelem %8d -exist\n" % (self.hashsize, self.maxelem))
150 self.f.write("flush %s\n" % self.prefix)
151
152 def write(self, network):
153 self.f.write("add %s %s\n" % (self.prefix, network))
154
155 # Increment network counter
156 self.networks += 1
157
158 def _write_footer(self):
159 # Jump back to the beginning of the file
160 self.f.seek(0)
161
162 # Rewrite the header with better configuration
163 self._write_header()
164
165
166 class NftablesOutputWriter(OutputWriter):
167 """
168 For nftables
169 """
170 suffix = "set"
171
172 def _write_header(self):
173 self.f.write("define %s = {\n" % self.prefix)
174
175 def _write_footer(self):
176 self.f.write("}\n")
177
178 def write(self, network):
179 self.f.write(" %s,\n" % network)
180
181
182 class XTGeoIPOutputWriter(OutputWriter):
183 """
184 Formats the output in that way, that it can be loaded by
185 the xt_geoip kernel module from xtables-addons.
186 """
187 suffix = "iv"
188 mode = "wb"
189
190 def write(self, network):
191 self.f.write(network._first_address)
192 self.f.write(network._last_address)
193
194
195 formats = {
196 "ipset" : IpsetOutputWriter,
197 "list" : OutputWriter,
198 "nftables" : NftablesOutputWriter,
199 "xt_geoip" : XTGeoIPOutputWriter,
200 }
201
202 class Exporter(object):
203 def __init__(self, db, writer):
204 self.db, self.writer = db, writer
205
206 def export(self, directory, families, countries, asns):
207 for family in families:
208 log.debug("Exporting family %s" % family)
209
210 writers = {}
211
212 # Create writers for countries
213 for country_code in countries:
214 filename = self._make_filename(
215 directory, prefix=country_code, suffix=self.writer.suffix, family=family,
216 )
217
218 writers[country_code] = self.writer.open(filename, family, prefix="%s" % country_code)
219
220 # Create writers for ASNs
221 for asn in asns:
222 filename = self._make_filename(
223 directory, "AS%s" % asn, suffix=self.writer.suffix, family=family,
224 )
225
226 writers[asn] = self.writer.open(filename, family, prefix="AS%s" % asn)
227
228 # Filter countries from special country codes
229 country_codes = [
230 country_code for country_code in countries if not country_code in FLAGS.values()
231 ]
232
233 # Get all networks that match the family
234 networks = self.db.search_networks(family=family,
235 country_codes=country_codes, asns=asns, flatten=True)
236
237 # Walk through all networks
238 for network in networks:
239 # Write matching countries
240 try:
241 writers[network.country_code].write(network)
242 except KeyError:
243 pass
244
245 # Write matching ASNs
246 try:
247 writers[network.asn].write(network)
248 except KeyError:
249 pass
250
251 # Handle flags
252 for flag in FLAGS:
253 if network.has_flag(flag):
254 # Fetch the "fake" country code
255 country = FLAGS[flag]
256
257 try:
258 writers[country].write(network)
259 except KeyError:
260 pass
261
262 # Write everything to the filesystem
263 for writer in writers.values():
264 writer.finish()
265
266 def _make_filename(self, directory, prefix, suffix, family):
267 filename = "%s.%s%s" % (
268 prefix, suffix, "6" if family == socket.AF_INET6 else "4"
269 )
270
271 return os.path.join(directory, filename)