src/python/importer.py

   1 #!/usr/bin/python3
   2 ###############################################################################
   3 #                                                                             #
   4 # libloc - A library to determine the location of someone on the Internet     #
   5 #                                                                             #
   6 # Copyright (C) 2020 IPFire Development Team <info@ipfire.org>                #
   7 #                                                                             #
   8 # This library is free software; you can redistribute it and/or               #
   9 # modify it under the terms of the GNU Lesser General Public                  #
  10 # License as published by the Free Software Foundation; either                #
  11 # version 2.1 of the License, or (at your option) any later version.          #
  12 #                                                                             #
  13 # This library is distributed in the hope that it will be useful,             #
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of              #
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU           #
  16 # Lesser General Public License for more details.                             #
  17 #                                                                             #
  18 ###############################################################################
  19
  20 import gzip
  21 import logging
  22 import urllib.request
  23
  24 # Initialise logging
  25 log = logging.getLogger("location.importer")
  26 log.propagate = 1
  27
  28 WHOIS_SOURCES = {
  29         # African Network Information Centre
  30         "AFRINIC": [
  31                 "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz"
  32                 ],
  33
  34         # Asia Pacific Network Information Centre
  35         "APNIC": [
  36                 "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
  37                 "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
  38                 #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
  39                 #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
  40                 "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
  41                 "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz"
  42                 ],
  43
  44         # American Registry for Internet Numbers
  45         # XXX there is nothing useful for us in here
  46         # ARIN: [
  47         #       "https://ftp.arin.net/pub/rr/arin.db"
  48         # ],
  49
  50         # Latin America and Caribbean Network Information Centre
  51         # XXX ???
  52
  53         # Réseaux IP Européens
  54         "RIPE": [
  55                 "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
  56                 "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
  57                 #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
  58                 #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
  59                 "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
  60                 "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz"
  61                 ],
  62 }
  63
  64 EXTENDED_SOURCES = {
  65         # African Network Information Centre
  66         # "ARIN": [
  67         #       "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest"
  68         # ],
  69
  70         # Asia Pacific Network Information Centre
  71         # "APNIC": [
  72         #       "https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest"
  73         # ],
  74
  75         # American Registry for Internet Numbers
  76         "ARIN": [
  77                 "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"
  78                 ],
  79
  80         # Latin America and Caribbean Network Information Centre
  81         "LACNIC": [
  82                 "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"
  83                 ],
  84
  85         # Réseaux IP Européens
  86         # "RIPE": [
  87         #       "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest"
  88         # ],
  89 }
  90
  91 class Downloader(object):
  92         def __init__(self):
  93                 self.proxy = None
  94
  95         def set_proxy(self, url):
  96                 """
  97                         Sets a HTTP proxy that is used to perform all requests
  98                 """
  99                 log.info("Using proxy %s" % url)
 100                 self.proxy = url
 101
 102         def request(self, url, data=None, return_blocks=False):
 103                 req = urllib.request.Request(url, data=data)
 104
 105                 # Configure proxy
 106                 if self.proxy:
 107                         req.set_proxy(self.proxy, "http")
 108
 109                 return DownloaderContext(self, req, return_blocks=return_blocks)
 110
 111
 112 class DownloaderContext(object):
 113         def __init__(self, downloader, request, return_blocks=False):
 114                 self.downloader = downloader
 115                 self.request = request
 116
 117                 # Should we return one block or a single line?
 118                 self.return_blocks = return_blocks
 119
 120                 # Save the response object
 121                 self.response = None
 122
 123         def __enter__(self):
 124                 log.info("Retrieving %s..." % self.request.full_url)
 125
 126                 # Send request
 127                 self.response = urllib.request.urlopen(self.request)
 128
 129                 # Log the response headers
 130                 log.debug("Response Headers:")
 131                 for header in self.headers:
 132                         log.debug("     %s: %s" % (header, self.get_header(header)))
 133
 134                 return self
 135
 136         def __exit__(self, type, value, traceback):
 137                 pass
 138
 139         def __iter__(self):
 140                 """
 141                         Makes the object iterable by going through each block
 142                 """
 143                 if self.return_blocks:
 144                         return iterate_over_blocks(self.body)
 145
 146                 return iterate_over_lines(self.body)
 147
 148         @property
 149         def headers(self):
 150                 if self.response:
 151                         return self.response.headers
 152
 153         def get_header(self, name):
 154                 if self.headers:
 155                         return self.headers.get(name)
 156
 157         @property
 158         def body(self):
 159                 """
 160                         Returns a file-like object with the decoded content
 161                         of the response.
 162                 """
 163                 content_type = self.get_header("Content-Type")
 164
 165                 # Decompress any gzipped response on the fly
 166                 if content_type in ("application/x-gzip", "application/gzip"):
 167                         return gzip.GzipFile(fileobj=self.response, mode="rb")
 168
 169                 # Return the response by default
 170                 return self.response
 171
 172
 173 def read_blocks(f):
 174         for block in iterate_over_blocks(f):
 175                 type = None
 176                 data = {}
 177
 178                 for i, line in enumerate(block):
 179                         key, value = line.split(":", 1)
 180
 181                         # The key of the first line defines the type
 182                         if i == 0:
 183                                 type = key
 184
 185                         # Store value
 186                         data[key] = value.strip()
 187
 188                 yield type, data
 189
 190 def iterate_over_blocks(f, charsets=("utf-8", "latin1")):
 191         block = []
 192
 193         for line in f:
 194                 # Convert to string
 195                 for charset in charsets:
 196                         try:
 197                                 line = line.decode(charset)
 198                         except UnicodeDecodeError:
 199                                 continue
 200                         else:
 201                                 break
 202
 203                 # Skip commented lines
 204                 if line.startswith("#") or line.startswith("%"):
 205                         continue
 206
 207                 # Strip line-endings
 208                 line = line.rstrip()
 209
 210                 # Remove any comments at the end of line
 211                 line, hash, comment = line.partition("#")
 212
 213                 if comment:
 214                         # Strip any whitespace before the comment
 215                         line = line.rstrip()
 216
 217                         # If the line is now empty, we move on
 218                         if not line:
 219                                 continue
 220
 221                 if line:
 222                         block.append(line)
 223                         continue
 224
 225                 # End the block on an empty line
 226                 if block:
 227                         yield block
 228
 229                 # Reset the block
 230                 block = []
 231
 232         # Return the last block
 233         if block:
 234                 yield block
 235
 236
 237 def iterate_over_lines(f):
 238         for line in f:
 239                 # Decode the line
 240                 line = line.decode()
 241
 242                 # Strip the ending
 243                 yield line.rstrip()