]> git.ipfire.org Git - people/ms/libloc.git/blame - src/python/export.py
export: Conditionally enable flattening
[people/ms/libloc.git] / src / python / export.py
CommitLineData
88ef7e9c
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
e17e804e 6# Copyright (C) 2020-2021 IPFire Development Team <info@ipfire.org> #
88ef7e9c
MT
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import io
21import ipaddress
22import logging
47de14b0 23import math
88ef7e9c
MT
24import os
25import socket
26
fae36e81
MT
27import _location
28
88ef7e9c
MT
29# Initialise logging
30log = logging.getLogger("location.export")
31log.propagate = 1
32
bd1dc6bf 33FLAGS = {
fae36e81
MT
34 _location.NETWORK_FLAG_ANONYMOUS_PROXY : "A1",
35 _location.NETWORK_FLAG_SATELLITE_PROVIDER : "A2",
36 _location.NETWORK_FLAG_ANYCAST : "A3",
e17e804e 37 _location.NETWORK_FLAG_DROP : "XD",
fae36e81
MT
38}
39
88ef7e9c
MT
40class OutputWriter(object):
41 suffix = "networks"
42 mode = "w"
43
68679ef4
MT
44 # Enable network flattening (i.e. networks cannot overlap)
45 flatten = False
46
27dc4fa5
MT
47 def __init__(self, f, family=None, prefix=None):
48 self.f = f
49 self.prefix = prefix
50 self.family = family
88ef7e9c 51
47de14b0
MT
52 # Call any custom initialization
53 self.init()
54
88ef7e9c
MT
55 # Immediately write the header
56 self._write_header()
57
47de14b0
MT
58 def init(self):
59 """
60 To be overwritten by anything that inherits from this
61 """
62 pass
63
88ef7e9c 64 @classmethod
27dc4fa5 65 def open(cls, filename, *args, **kwargs):
88ef7e9c
MT
66 """
67 Convenience function to open a file
68 """
69 f = open(filename, cls.mode)
70
27dc4fa5 71 return cls(f, *args, **kwargs)
88ef7e9c
MT
72
73 def __repr__(self):
74 return "<%s f=%s>" % (self.__class__.__name__, self.f)
75
76 def _write_header(self):
77 """
78 The header of the file
79 """
80 pass
81
82 def _write_footer(self):
83 """
84 The footer of the file
85 """
86 pass
87
c242f732 88 def write(self, network):
90d2194a 89 self.f.write("%s\n" % network)
43554dc4 90
88ef7e9c
MT
91 def finish(self):
92 """
93 Called when all data has been written
94 """
95 self._write_footer()
96
97 # Close the file
98 self.f.close()
99
100
101class IpsetOutputWriter(OutputWriter):
102 """
103 For ipset
104 """
105 suffix = "ipset"
106
47de14b0
MT
107 # The value is being used if we don't know any better
108 DEFAULT_HASHSIZE = 64
109
110 # We aim for this many networks in a bucket on average. This allows us to choose
111 # how much memory we want to sacrifice to gain better performance. The lower the
112 # factor, the faster a lookup will be, but it will use more memory.
113 # We will aim for only using three quarters of all buckets to avoid any searches
114 # through the linked lists.
115 HASHSIZE_FACTOR = 0.75
116
117 def init(self):
118 # Count all networks
119 self.networks = 0
120
121 @property
122 def hashsize(self):
123 """
124 Calculates an optimized hashsize
125 """
126 # Return the default value if we don't know the size of the set
127 if not self.networks:
128 return self.DEFAULT_HASHSIZE
129
130 # Find the nearest power of two that is larger than the number of networks
131 # divided by the hashsize factor.
132 exponent = math.log(self.networks / self.HASHSIZE_FACTOR, 2)
133
134 # Return the size of the hash
135 return 2 ** math.ceil(exponent)
136
88ef7e9c 137 def _write_header(self):
47de14b0 138 # This must have a fixed size, because we will write the header again in the end
27dc4fa5
MT
139 self.f.write("create %s hash:net family inet%s" % (
140 self.prefix,
141 "6" if self.family == socket.AF_INET6 else ""
142 ))
52176cc7 143 self.f.write(" hashsize %8d maxelem 1048576 -exist\n" % self.hashsize)
1b759b42 144 self.f.write("flush %s\n" % self.prefix)
88ef7e9c 145
90d2194a 146 def write(self, network):
88ef7e9c
MT
147 self.f.write("add %s %s\n" % (self.prefix, network))
148
47de14b0
MT
149 # Increment network counter
150 self.networks += 1
151
152 def _write_footer(self):
153 # Jump back to the beginning of the file
154 self.f.seek(0)
155
156 # Rewrite the header with better configuration
157 self._write_header()
158
88ef7e9c
MT
159
160class NftablesOutputWriter(OutputWriter):
161 """
162 For nftables
163 """
164 suffix = "set"
165
166 def _write_header(self):
167 self.f.write("define %s = {\n" % self.prefix)
168
169 def _write_footer(self):
170 self.f.write("}\n")
171
90d2194a 172 def write(self, network):
88ef7e9c
MT
173 self.f.write(" %s,\n" % network)
174
175
176class XTGeoIPOutputWriter(OutputWriter):
177 """
178 Formats the output in that way, that it can be loaded by
179 the xt_geoip kernel module from xtables-addons.
180 """
181 suffix = "iv"
182 mode = "wb"
68679ef4 183 flatten = True
88ef7e9c 184
90d2194a 185 def write(self, network):
90188dad
MT
186 self.f.write(network._first_address)
187 self.f.write(network._last_address)
88ef7e9c
MT
188
189
190formats = {
191 "ipset" : IpsetOutputWriter,
192 "list" : OutputWriter,
193 "nftables" : NftablesOutputWriter,
194 "xt_geoip" : XTGeoIPOutputWriter,
195}
196
197class Exporter(object):
198 def __init__(self, db, writer):
199 self.db, self.writer = db, writer
200
201 def export(self, directory, families, countries, asns):
202 for family in families:
203 log.debug("Exporting family %s" % family)
204
205 writers = {}
206
207 # Create writers for countries
208 for country_code in countries:
209 filename = self._make_filename(
210 directory, prefix=country_code, suffix=self.writer.suffix, family=family,
211 )
212
27dc4fa5 213 writers[country_code] = self.writer.open(filename, family, prefix="%s" % country_code)
88ef7e9c
MT
214
215 # Create writers for ASNs
216 for asn in asns:
217 filename = self._make_filename(
218 directory, "AS%s" % asn, suffix=self.writer.suffix, family=family,
219 )
220
27dc4fa5 221 writers[asn] = self.writer.open(filename, family, prefix="AS%s" % asn)
88ef7e9c 222
7af51f8a
MT
223 # Filter countries from special country codes
224 country_codes = [
bd1dc6bf 225 country_code for country_code in countries if not country_code in FLAGS.values()
7af51f8a
MT
226 ]
227
88ef7e9c 228 # Get all networks that match the family
7af51f8a 229 networks = self.db.search_networks(family=family,
68679ef4 230 country_codes=country_codes, asns=asns, flatten=self.writer.flatten)
28c73fa3 231
88ef7e9c 232 # Walk through all networks
bbed1fd2 233 for network in networks:
88ef7e9c 234 # Write matching countries
c242f732
MT
235 try:
236 writers[network.country_code].write(network)
237 except KeyError:
238 pass
88ef7e9c
MT
239
240 # Write matching ASNs
c242f732
MT
241 try:
242 writers[network.asn].write(network)
243 except KeyError:
244 pass
88ef7e9c 245
fae36e81 246 # Handle flags
bd1dc6bf 247 for flag in FLAGS:
fae36e81
MT
248 if network.has_flag(flag):
249 # Fetch the "fake" country code
bd1dc6bf 250 country = FLAGS[flag]
fae36e81 251
c242f732
MT
252 try:
253 writers[country].write(network)
254 except KeyError:
255 pass
fae36e81 256
88ef7e9c
MT
257 # Write everything to the filesystem
258 for writer in writers.values():
259 writer.finish()
260
261 def _make_filename(self, directory, prefix, suffix, family):
262 filename = "%s.%s%s" % (
263 prefix, suffix, "6" if family == socket.AF_INET6 else "4"
264 )
265
266 return os.path.join(directory, filename)