MAX_BUFFER_SIZE = 1 * 1024 * 1024 # 1 MiB
+supported_digest_algos = (
+ "blake2b512",
+)
+
+class UnsupportedDigestException(ValueError):
+ pass
+
class Uploads(base.Object):
def _get_upload(self, query, *args):
res = self.db.get(query, *args)
""", uuid,
)
- async def create(self, filename, size, owner=None):
+ async def create(self, filename, size, digest_algo, digest, owner=None):
builder = None
user = None
+ # Check if the digest algorithm is supported
+ if not digest_algo in supported_digest_algos:
+ raise UnsupportedDigestException(digest_algo)
+
+ # Check if the digest is set
+ elif not digest:
+ raise ValueError("Empty digest")
+
# Check uploader type
if isinstance(owner, builders.Builder):
builder = owner
user.check_storage_quota(size)
# Allocate a new temporary file
- async with self.backend.tempfile(delete=False) as f:
- upload = self._get_upload("""
- INSERT INTO
- uploads
- (
- filename,
- path,
- size,
- builder_id,
- user_id
- )
- VALUES
- (
- %s,
- %s,
- %s,
- %s,
- %s
- )
- RETURNING *""",
+ upload = self._get_upload("""
+ INSERT INTO
+ uploads
+ (
filename,
- f.name,
size,
- builder,
- user,
+ builder_id,
+ user_id,
+ digest_algo,
+ digest
+ )
+ VALUES
+ (
+ %s,
+ %s,
+ %s,
+ %s,
+ %s,
+ %s
)
+ RETURNING *""",
+ filename,
+ size,
+ builder,
+ user,
+ digest_algo,
+ digest,
+ )
# Return the newly created upload object
return upload
def size(self):
return self.data.size
+ @property
+ def digest_algo(self):
+ return self.data.digest_algo
+
+ @property
+ def digest(self):
+ return self.data.digest
+
# Builder
def get_builder(self):
def expires_at(self):
return self.data.expires_at
- @lazy_property
- def _filesize(self):
- return os.path.getsize(self.path)
-
- async def check_digest(self, algo, expected_digest):
+ async def copyfrom(self, src):
"""
- Checks if the upload matches an expected digest
+ Copies the content of this upload from the source file descriptor
"""
- # Compute the digest
- computed_digest = await self.digest(algo)
+ # Reset the source file handle
+ src.seek(0)
- # Compare the digests
- if hmac.compare_digest(computed_digest, expected_digest):
- return True
+ # Setup the hash function
+ h = hashlib.new(self.digest_algo)
- # The digests didn't match
- log.error("Upload does not match its digest")
+ async with self.backend.tempfile(delete=False) as f:
+ try:
+ while True:
+ buffer = src.read(1024 ** 2)
+ if not buffer:
+ break
- return False
+ # Put the buffer into the hash function
+ h.update(buffer)
- async def digest(self, algo):
- """
- Computes the digest of this download
- """
- log.debug("Computing '%s' digest for %s" % (algo, self))
+ # Write the buffer to disk
+ await f.write(buffer)
- return await asyncio.to_thread(self._digest, algo)
+ # How many bytes did we write?
+ received_size = await f.tell()
- def _digest(self, algo):
- # Initialize the hash function
- h = hashlib.new(algo)
+ # Get the digest
+ computed_digest = h.digest()
- # Read the entire file and pass it to the hash function
- with open(self.path, "rb") as f:
- while True:
- buf = f.read(65536)
- if not buf:
- break
+ # Check if the filesize matches
+ if not received_size == self.size:
+ raise ValueError("File size differs")
- h.update(buf)
+ # Check that the digest matches
+ if not hmac.compare_digest(computed_digest, self.digest):
+ raise ValueError("Invalid digest")
- # Return the digest
- return h.digest()
+ # If there has been any kind of exception, we want to delete the temporary file
+ except Exception as e:
+ await self.backend.unlink(f.name)
- async def copyfrom(self, src):
- """
- Copies the content of this upload from the source file descriptor
- """
- return await asyncio.to_thread(self._copyfrom, src)
-
- def _copyfrom(self, src):
- # Open the destination file and copy all source data into it
- with open(self.path, "wb") as dst:
- shutil.copyfileobj(src, dst)
+ raise e
- # Check that we didn't copy too much
- if dst.tell() > self.size:
- raise OverflowError
+ # Store the path
+ self._set_attribute("path", f.name)
async def copyinto(self, dst):
"""
user_id integer,
builder_id integer,
filename text NOT NULL,
- path text NOT NULL,
+ path text,
size bigint NOT NULL,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
- expires_at timestamp without time zone DEFAULT (CURRENT_TIMESTAMP + '24:00:00'::interval) NOT NULL
+ expires_at timestamp without time zone DEFAULT (CURRENT_TIMESTAMP + '24:00:00'::interval) NOT NULL,
+ digest_algo text NOT NULL,
+ digest bytea NOT NULL
);
CREATE UNIQUE INDEX sources_slug ON public.sources USING btree (slug) WHERE (deleted_at IS NULL);
+--
+-- Name: uploads_builder_id; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX uploads_builder_id ON public.uploads USING btree (builder_id);
+
+
+--
+-- Name: uploads_user_id; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX uploads_user_id ON public.uploads USING btree (user_id);
+
+
--
-- Name: uploads_uuid; Type: INDEX; Schema: public; Owner: -
--
import tornado.web
from . import base
+from .. import uploads
from .. import users
-@tornado.web.stream_request_body
class APIv1IndexHandler(base.APIMixin, tornado.web.RequestHandler):
# Allow users to perform uploads
allow_users = True
- def initialize(self):
- # Buffer to cache the uploaded content
- self.buffer = io.BytesIO()
-
- def data_received(self, data):
- """
- Called when some data is being received
- """
- self.buffer.write(data)
-
- @tornado.web.authenticated
+ @base.negotiate
def get(self):
uploads = []
"uploads" : uploads,
})
- @tornado.web.authenticated
- async def put(self):
+ @base.negotiate
+ async def post(self):
"""
- Called after the entire file has been received
+ Creates a new upload and returns its UUID
"""
# Fetch the filename
filename = self.get_argument("filename")
# Fetch file size
size = self.get_argument_int("size")
- # Fetch the digest argument
- algo, delim, hexdigest = self.get_argument("digest").partition(":")
+ # Fetch the digest algorithm
+ digest_algo = self.get_argument("hexdigest_algo")
- # Convert hexdigest
- digest = bytes.fromhex(hexdigest)
+ # Fetch the digest
+ hexdigest = self.get_argument("hexdigest")
- # Move to the beginning of the buffer
- self.buffer.seek(0)
+ # Convert hexdigest
+ try:
+ digest = bytes.fromhex(hexdigest)
+ except ValueError as e:
+ raise tornado.web.HTTPError(400, "Invalid hexdigest") from e
# Create a new upload
with self.db.transaction():
filename,
size=size,
owner=self.current_user,
+ digest_algo=digest_algo,
+ digest=digest,
)
+ except uploads.UnsupportedDigestException as e:
+ raise tornado.web.HTTPError(400,
+ "Unsupported digest %s" % digest_algo) from e
+
except users.QuotaExceededError as e:
raise tornado.web.HTTPError(400,
"Quota exceeded for %s" % self.current_user) from e
- # Import the payload from the buffer
- await upload.copyfrom(self.buffer)
-
- # Check the digest
- if not await upload.check_digest(algo, digest):
- # 422 - Unprocessable Entity
- raise tornado.web.HTTPError(422, "Digest did not match")
+ except ValueError as e:
+ raise tornado.web.HTTPError(400, "%s" % e) from e
# Send the ID of the upload back to the client
self.finish({
})
+@tornado.web.stream_request_body
class APIv1DetailHandler(base.APIMixin, tornado.web.RequestHandler):
# Allow users to perform uploads
allow_users = True
- @tornado.web.authenticated
+ def initialize(self):
+ # Buffer to cache the uploaded content
+ self.buffer = io.BytesIO()
+
+ def data_received(self, data):
+ """
+ Called when some data is being received
+ """
+ self.buffer.write(data)
+
+ # Yes, this does not require authentication. You have seen this correctly.
+ # This is because of us using SPNEGO which might cause a request being sent
+ # more than once, which therefore means that the payload is being transferred
+ # more than once.
+ # To avoid this, we request the digest when the upload is being created, we
+ # then generate a unique ID which an attacker would have to guess first and
+ # then have to upload a file which's hash collides with the original file.
+ async def put(self, uuid):
+ """
+ Called to store the received payload
+ """
+ # Fetch the upload
+ upload = self.backend.uploads.get_by_uuid(uuid)
+ if not upload:
+ raise tornado.web.HTTPError(404, "Could not find upload %s" % uuid)
+
+ # Import the payload from the buffer
+ with self.db.transaction():
+ try:
+ await upload.copyfrom(self.buffer)
+
+ except ValueError as e:
+ raise tornado.web.HTTPError(400, "%s" % e) from e
+
+ @base.negotiate
async def delete(self, uuid):
"""
Deletes an upload with a certain UUID