]> git.ipfire.org Git - location/libloc.git/blame - src/python/export.py
ipset: Optimise hash table size
[location/libloc.git] / src / python / export.py
CommitLineData
88ef7e9c
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
e17e804e 6# Copyright (C) 2020-2021 IPFire Development Team <info@ipfire.org> #
88ef7e9c
MT
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import io
21import ipaddress
22import logging
47de14b0 23import math
88ef7e9c
MT
24import os
25import socket
26
fae36e81
MT
27import _location
28
88ef7e9c
MT
29# Initialise logging
30log = logging.getLogger("location.export")
31log.propagate = 1
32
bd1dc6bf 33FLAGS = {
fae36e81
MT
34 _location.NETWORK_FLAG_ANONYMOUS_PROXY : "A1",
35 _location.NETWORK_FLAG_SATELLITE_PROVIDER : "A2",
36 _location.NETWORK_FLAG_ANYCAST : "A3",
e17e804e 37 _location.NETWORK_FLAG_DROP : "XD",
fae36e81
MT
38}
39
88ef7e9c
MT
40class OutputWriter(object):
41 suffix = "networks"
42 mode = "w"
43
90d2194a
MT
44 def __init__(self, f, prefix=None):
45 self.f, self.prefix = f, prefix
88ef7e9c 46
47de14b0
MT
47 # Call any custom initialization
48 self.init()
49
88ef7e9c
MT
50 # Immediately write the header
51 self._write_header()
52
47de14b0
MT
53 def init(self):
54 """
55 To be overwritten by anything that inherits from this
56 """
57 pass
58
88ef7e9c 59 @classmethod
627bf1da 60 def open(cls, filename, **kwargs):
88ef7e9c
MT
61 """
62 Convenience function to open a file
63 """
64 f = open(filename, cls.mode)
65
627bf1da 66 return cls(f, **kwargs)
88ef7e9c
MT
67
68 def __repr__(self):
69 return "<%s f=%s>" % (self.__class__.__name__, self.f)
70
71 def _write_header(self):
72 """
73 The header of the file
74 """
75 pass
76
77 def _write_footer(self):
78 """
79 The footer of the file
80 """
81 pass
82
c242f732 83 def write(self, network):
90d2194a 84 self.f.write("%s\n" % network)
43554dc4 85
88ef7e9c
MT
86 def finish(self):
87 """
88 Called when all data has been written
89 """
90 self._write_footer()
91
92 # Close the file
93 self.f.close()
94
95
96class IpsetOutputWriter(OutputWriter):
97 """
98 For ipset
99 """
100 suffix = "ipset"
101
47de14b0
MT
102 # The value is being used if we don't know any better
103 DEFAULT_HASHSIZE = 64
104
105 # We aim for this many networks in a bucket on average. This allows us to choose
106 # how much memory we want to sacrifice to gain better performance. The lower the
107 # factor, the faster a lookup will be, but it will use more memory.
108 # We will aim for only using three quarters of all buckets to avoid any searches
109 # through the linked lists.
110 HASHSIZE_FACTOR = 0.75
111
112 def init(self):
113 # Count all networks
114 self.networks = 0
115
116 @property
117 def hashsize(self):
118 """
119 Calculates an optimized hashsize
120 """
121 # Return the default value if we don't know the size of the set
122 if not self.networks:
123 return self.DEFAULT_HASHSIZE
124
125 # Find the nearest power of two that is larger than the number of networks
126 # divided by the hashsize factor.
127 exponent = math.log(self.networks / self.HASHSIZE_FACTOR, 2)
128
129 # Return the size of the hash
130 return 2 ** math.ceil(exponent)
131
132 @property
133 def maxelem(self):
134 """
135 Tells ipset how large the set will be.
136
137 Since these are considered immutable, we will use the total number of networks.
138 """
139 return self.networks
140
88ef7e9c 141 def _write_header(self):
47de14b0
MT
142 # This must have a fixed size, because we will write the header again in the end
143 self.f.write("create %s hash:net family inet "
144 "hashsize %8d maxelem %8d -exist\n" % (self.prefix, self.hashsize, self.maxelem))
1b759b42 145 self.f.write("flush %s\n" % self.prefix)
88ef7e9c 146
90d2194a 147 def write(self, network):
88ef7e9c
MT
148 self.f.write("add %s %s\n" % (self.prefix, network))
149
47de14b0
MT
150 # Increment network counter
151 self.networks += 1
152
153 def _write_footer(self):
154 # Jump back to the beginning of the file
155 self.f.seek(0)
156
157 # Rewrite the header with better configuration
158 self._write_header()
159
88ef7e9c
MT
160
161class NftablesOutputWriter(OutputWriter):
162 """
163 For nftables
164 """
165 suffix = "set"
166
167 def _write_header(self):
168 self.f.write("define %s = {\n" % self.prefix)
169
170 def _write_footer(self):
171 self.f.write("}\n")
172
90d2194a 173 def write(self, network):
88ef7e9c
MT
174 self.f.write(" %s,\n" % network)
175
176
177class XTGeoIPOutputWriter(OutputWriter):
178 """
179 Formats the output in that way, that it can be loaded by
180 the xt_geoip kernel module from xtables-addons.
181 """
182 suffix = "iv"
183 mode = "wb"
184
90d2194a 185 def write(self, network):
90188dad
MT
186 self.f.write(network._first_address)
187 self.f.write(network._last_address)
88ef7e9c
MT
188
189
190formats = {
191 "ipset" : IpsetOutputWriter,
192 "list" : OutputWriter,
193 "nftables" : NftablesOutputWriter,
194 "xt_geoip" : XTGeoIPOutputWriter,
195}
196
197class Exporter(object):
198 def __init__(self, db, writer):
199 self.db, self.writer = db, writer
200
201 def export(self, directory, families, countries, asns):
202 for family in families:
203 log.debug("Exporting family %s" % family)
204
205 writers = {}
206
207 # Create writers for countries
208 for country_code in countries:
209 filename = self._make_filename(
210 directory, prefix=country_code, suffix=self.writer.suffix, family=family,
211 )
212
181220ac 213 writers[country_code] = self.writer.open(filename, prefix="%s" % country_code)
88ef7e9c
MT
214
215 # Create writers for ASNs
216 for asn in asns:
217 filename = self._make_filename(
218 directory, "AS%s" % asn, suffix=self.writer.suffix, family=family,
219 )
220
627bf1da 221 writers[asn] = self.writer.open(filename, prefix="AS%s" % asn)
88ef7e9c 222
7af51f8a
MT
223 # Filter countries from special country codes
224 country_codes = [
bd1dc6bf 225 country_code for country_code in countries if not country_code in FLAGS.values()
7af51f8a
MT
226 ]
227
88ef7e9c 228 # Get all networks that match the family
7af51f8a 229 networks = self.db.search_networks(family=family,
bce0c929 230 country_codes=country_codes, asns=asns, flatten=True)
28c73fa3 231
88ef7e9c 232 # Walk through all networks
bbed1fd2 233 for network in networks:
88ef7e9c 234 # Write matching countries
c242f732
MT
235 try:
236 writers[network.country_code].write(network)
237 except KeyError:
238 pass
88ef7e9c
MT
239
240 # Write matching ASNs
c242f732
MT
241 try:
242 writers[network.asn].write(network)
243 except KeyError:
244 pass
88ef7e9c 245
fae36e81 246 # Handle flags
bd1dc6bf 247 for flag in FLAGS:
fae36e81
MT
248 if network.has_flag(flag):
249 # Fetch the "fake" country code
bd1dc6bf 250 country = FLAGS[flag]
fae36e81 251
c242f732
MT
252 try:
253 writers[country].write(network)
254 except KeyError:
255 pass
fae36e81 256
88ef7e9c
MT
257 # Write everything to the filesystem
258 for writer in writers.values():
259 writer.finish()
260
261 def _make_filename(self, directory, prefix, suffix, family):
262 filename = "%s.%s%s" % (
263 prefix, suffix, "6" if family == socket.AF_INET6 else "4"
264 )
265
266 return os.path.join(directory, filename)