]>
git.ipfire.org Git - location/location-database.git/blob - tools/downloader.py
2 ###############################################################################
4 # location-database - A database to determine someone's #
5 # location on the Internet #
6 # Copyright (C) 2018 Michael Tremer #
8 # This program is free software: you can redistribute it and/or modify #
9 # it under the terms of the GNU General Public License as published by #
10 # the Free Software Foundation, either version 3 of the License, or #
11 # (at your option) any later version. #
13 # This program is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16 # GNU General Public License for more details. #
18 # You should have received a copy of the GNU General Public License #
19 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
21 ###############################################################################
30 log
= logging
.getLogger("downloader")
33 class Downloader(object):
34 USER_AGENT
= "location-database/1.0"
39 def set_proxy(self
, url
):
41 Sets a HTTP proxy that is used to perform all requests
43 log
.info("Using proxy %s" % url
)
46 def request(self
, url
, data
=None, return_blocks
=True):
47 req
= urllib
.request
.Request(url
, data
=data
)
51 req
.set_proxy(self
.proxy
, "http")
55 req
.add_header("User-Agent", self
.USER_AGENT
)
57 return DownloaderContext(self
, req
, return_blocks
=return_blocks
)
60 class DownloaderContext(object):
61 def __init__(self
, downloader
, request
, return_blocks
=True):
62 self
.downloader
= downloader
63 self
.request
= request
65 # Should we return one block or a single line?
66 self
.return_blocks
= return_blocks
68 # Save the response object
72 log
.info("Retrieving %s..." % self
.request
.full_url
)
75 self
.response
= urllib
.request
.urlopen(self
.request
)
77 # Log the response headers
78 log
.debug("Response Headers:")
79 for header
in self
.headers
:
80 log
.debug(" %s: %s" % (header
, self
.get_header(header
)))
84 def __exit__(self
, type, value
, traceback
):
89 Makes the object iterable by going through each block
91 if self
.return_blocks
:
92 for b
in util
.iterate_over_blocks(self
.body
):
101 line
= body
.readline()
114 return self
.response
.headers
116 def get_header(self
, name
):
118 return self
.headers
.get(name
)
123 Returns a file-like object with the decoded content
126 content_type
= self
.get_header("Content-Type")
128 # Decompress any gzipped response on the fly
129 if content_type
in ("application/x-gzip", "application/gzip"):
130 return gzip
.GzipFile(fileobj
=self
.response
, mode
="rb")
132 # Return the response by default
136 if __name__
== "__main__":
139 # Enable debug logging
140 logging
.basicConfig(level
=logging
.DEBUG
)
144 for url
in sys
.argv
[1:]:
145 print("Downloading %s..." % url
)
147 with d
.request(url
) as r
: