uploads: Rewrite the whole thing

author Michael Tremer <michael.tremer@ipfire.org>

Thu, 19 Oct 2023 16:03:55 +0000 (16:03 +0000)

committer Michael Tremer <michael.tremer@ipfire.org>

Thu, 19 Oct 2023 16:03:55 +0000 (16:03 +0000)
author Michael Tremer <michael.tremer@ipfire.org>
Thu, 19 Oct 2023 16:03:55 +0000 (16:03 +0000)
committer Michael Tremer <michael.tremer@ipfire.org>
Thu, 19 Oct 2023 16:03:55 +0000 (16:03 +0000)
diff --git a/src/buildservice/uploads.py b/src/buildservice/uploads.py

index 398d801c891eac5f05df0a6d7c064622ce101b2e..25eb91199ef6efa55ae7620bff000a098290c3c8 100644 (file)
--- a/src/buildservice/uploads.py
+++ b/src/buildservice/uploads.py
@@ -18,6 +18,13 @@ log = logging.getLogger("pbs.uploads")
  
  MAX_BUFFER_SIZE = 1 * 1024 * 1024 # 1 MiB
  
+supported_digest_algos = (
+       "blake2b512",
+)
+
+class UnsupportedDigestException(ValueError):
+       pass
+
  class Uploads(base.Object):
         def _get_upload(self, query, *args):
                 res = self.db.get(query, *args)
@@ -50,10 +57,18 @@ class Uploads(base.Object):
                         """, uuid,
                 )
  
-       async def create(self, filename, size, owner=None):
+       async def create(self, filename, size, digest_algo, digest, owner=None):
                 builder = None
                 user = None
  
+               # Check if the digest algorithm is supported
+               if not digest_algo in supported_digest_algos:
+                       raise UnsupportedDigestException(digest_algo)
+
+               # Check if the digest is set
+               elif not digest:
+                       raise ValueError("Empty digest")
+
                 # Check uploader type
                 if isinstance(owner, builders.Builder):
                         builder = owner
@@ -66,32 +81,34 @@ class Uploads(base.Object):
                         user.check_storage_quota(size)
  
                 # Allocate a new temporary file
-               async with self.backend.tempfile(delete=False) as f:
-                       upload = self._get_upload("""
-                               INSERT INTO
-                                       uploads
-                               (
-                                       filename,
-                                       path,
-                                       size,
-                                       builder_id,
-                                       user_id
-                               )
-                               VALUES
-                               (
-                                       %s,
-                                       %s,
-                                       %s,
-                                       %s,
-                                       %s
-                               )
-                               RETURNING *""",
+               upload = self._get_upload("""
+                       INSERT INTO
+                               uploads
+                       (
                                 filename,
-                               f.name,
                                 size,
-                               builder,
-                               user,
+                               builder_id,
+                               user_id,
+                               digest_algo,
+                               digest
+                       )
+                       VALUES
+                       (
+                               %s,
+                               %s,
+                               %s,
+                               %s,
+                               %s,
+                               %s
                         )
+                       RETURNING *""",
+                       filename,
+                       size,
+                       builder,
+                       user,
+                       digest_algo,
+                       digest,
+               )
  
                 # Return the newly created upload object
                 return upload
@@ -157,6 +174,14 @@ class Upload(base.DataObject):
         def size(self):
                 return self.data.size
  
+       @property
+       def digest_algo(self):
+               return self.data.digest_algo
+
+       @property
+       def digest(self):
+               return self.data.digest
+
         # Builder
  
         def get_builder(self):
@@ -206,64 +231,51 @@ class Upload(base.DataObject):
         def expires_at(self):
                 return self.data.expires_at
  
-       @lazy_property
-       def _filesize(self):
-               return os.path.getsize(self.path)
-
-       async def check_digest(self, algo, expected_digest):
+       async def copyfrom(self, src):
                 """
-                       Checks if the upload matches an expected digest
+                       Copies the content of this upload from the source file descriptor
                 """
-               # Compute the digest
-               computed_digest = await self.digest(algo)
+               # Reset the source file handle
+               src.seek(0)
  
-               # Compare the digests
-               if hmac.compare_digest(computed_digest, expected_digest):
-                       return True
+               # Setup the hash function
+               h = hashlib.new(self.digest_algo)
  
-               # The digests didn't match
-               log.error("Upload does not match its digest")
+               async with self.backend.tempfile(delete=False) as f:
+                       try:
+                               while True:
+                                       buffer = src.read(1024 ** 2)
+                                       if not buffer:
+                                               break
  
-               return False
+                                       # Put the buffer into the hash function
+                                       h.update(buffer)
  
-       async def digest(self, algo):
-               """
-                       Computes the digest of this download
-               """
-               log.debug("Computing '%s' digest for %s" % (algo, self))
+                                       # Write the buffer to disk
+                                       await f.write(buffer)
  
-               return await asyncio.to_thread(self._digest, algo)
+                               # How many bytes did we write?
+                               received_size = await f.tell()
  
-       def _digest(self, algo):
-               # Initialize the hash function
-               h = hashlib.new(algo)
+                               # Get the digest
+                               computed_digest = h.digest()
  
-               # Read the entire file and pass it to the hash function
-               with open(self.path, "rb") as f:
-                       while True:
-                               buf = f.read(65536)
-                               if not buf:
-                                       break
+                               # Check if the filesize matches
+                               if not received_size == self.size:
+                                       raise ValueError("File size differs")
  
-                               h.update(buf)
+                               # Check that the digest matches
+                               if not hmac.compare_digest(computed_digest, self.digest):
+                                       raise ValueError("Invalid digest")
  
-               # Return the digest
-               return h.digest()
+                       # If there has been any kind of exception, we want to delete the temporary file
+                       except Exception as e:
+                               await self.backend.unlink(f.name)
  
-       async def copyfrom(self, src):
-               """
-                       Copies the content of this upload from the source file descriptor
-               """
-               return await asyncio.to_thread(self._copyfrom, src)
-
-       def _copyfrom(self, src):
-               # Open the destination file and copy all source data into it
-               with open(self.path, "wb") as dst:
-                       shutil.copyfileobj(src, dst)
+                               raise e
  
-                       # Check that we didn't copy too much
-                       if dst.tell() > self.size:
-                               raise OverflowError
+               # Store the path
+               self._set_attribute("path", f.name)
  
         async def copyinto(self, dst):
                 """
diff --git a/src/database.sql b/src/database.sql

index ba33e806909190851af74b8e2fb40077ea992765..87a6791d7ce221b9b5f9cdc05da57e74b0877a26 100644 (file)
--- a/src/database.sql
+++ b/src/database.sql
@@ -1047,10 +1047,12 @@ CREATE TABLE public.uploads (
      user_id integer,
      builder_id integer,
      filename text NOT NULL,
-    path text NOT NULL,
+    path text,
      size bigint NOT NULL,
      created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
-    expires_at timestamp without time zone DEFAULT (CURRENT_TIMESTAMP + '24:00:00'::interval) NOT NULL
+    expires_at timestamp without time zone DEFAULT (CURRENT_TIMESTAMP + '24:00:00'::interval) NOT NULL,
+    digest_algo text NOT NULL,
+    digest bytea NOT NULL
  );
  
  
@@ -1873,6 +1875,20 @@ CREATE INDEX sources_repo_id ON public.sources USING btree (repo_id) WHERE (dele
  CREATE UNIQUE INDEX sources_slug ON public.sources USING btree (slug) WHERE (deleted_at IS NULL);
  
  
+--
+-- Name: uploads_builder_id; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX uploads_builder_id ON public.uploads USING btree (builder_id);
+
+
+--
+-- Name: uploads_user_id; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX uploads_user_id ON public.uploads USING btree (user_id);
+
+
  --
  -- Name: uploads_uuid; Type: INDEX; Schema: public; Owner: -
  --
diff --git a/src/web/uploads.py b/src/web/uploads.py

index 69629cb3813bdcdefa1375d9b9e9d9b449a3f260..9172473b2af4e6881d2cbe151be0f1c370e4c4f6 100644 (file)
--- a/src/web/uploads.py
+++ b/src/web/uploads.py
@@ -23,24 +23,14 @@ import io
  import tornado.web
  
  from . import base
+from .. import uploads
  from .. import users
  
-@tornado.web.stream_request_body
  class APIv1IndexHandler(base.APIMixin, tornado.web.RequestHandler):
         # Allow users to perform uploads
         allow_users = True
  
-       def initialize(self):
-               # Buffer to cache the uploaded content
-               self.buffer = io.BytesIO()
-
-       def data_received(self, data):
-               """
-                       Called when some data is being received
-               """
-               self.buffer.write(data)
-
-       @tornado.web.authenticated
+       @base.negotiate
         def get(self):
                 uploads = []
  
@@ -59,10 +49,10 @@ class APIv1IndexHandler(base.APIMixin, tornado.web.RequestHandler):
                         "uploads" : uploads,
                 })
  
-       @tornado.web.authenticated
-       async def put(self):
+       @base.negotiate
+       async def post(self):
                 """
-                       Called after the entire file has been received
+                       Creates a new upload and returns its UUID
                 """
                 # Fetch the filename
                 filename = self.get_argument("filename")
@@ -70,14 +60,17 @@ class APIv1IndexHandler(base.APIMixin, tornado.web.RequestHandler):
                 # Fetch file size
                 size = self.get_argument_int("size")
  
-               # Fetch the digest argument
-               algo, delim, hexdigest = self.get_argument("digest").partition(":")
+               # Fetch the digest algorithm
+               digest_algo = self.get_argument("hexdigest_algo")
  
-               # Convert hexdigest
-               digest = bytes.fromhex(hexdigest)
+               # Fetch the digest
+               hexdigest = self.get_argument("hexdigest")
  
-               # Move to the beginning of the buffer
-               self.buffer.seek(0)
+               # Convert hexdigest
+               try:
+                       digest = bytes.fromhex(hexdigest)
+               except ValueError as e:
+                       raise tornado.web.HTTPError(400, "Invalid hexdigest") from e
  
                 # Create a new upload
                 with self.db.transaction():
@@ -86,19 +79,20 @@ class APIv1IndexHandler(base.APIMixin, tornado.web.RequestHandler):
                                         filename,
                                         size=size,
                                         owner=self.current_user,
+                                       digest_algo=digest_algo,
+                                       digest=digest,
                                 )
  
+                       except uploads.UnsupportedDigestException as e:
+                               raise tornado.web.HTTPError(400,
+                                       "Unsupported digest %s" % digest_algo) from e
+
                         except users.QuotaExceededError as e:
                                 raise tornado.web.HTTPError(400,
                                         "Quota exceeded for %s" % self.current_user) from e
  
-                       # Import the payload from the buffer
-                       await upload.copyfrom(self.buffer)
-
-                       # Check the digest
-                       if not await upload.check_digest(algo, digest):
-                               # 422 - Unprocessable Entity
-                               raise tornado.web.HTTPError(422, "Digest did not match")
+                       except ValueError as e:
+                               raise tornado.web.HTTPError(400, "%s" % e) from e
  
                 # Send the ID of the upload back to the client
                 self.finish({
@@ -107,11 +101,46 @@ class APIv1IndexHandler(base.APIMixin, tornado.web.RequestHandler):
                 })
  
  
+@tornado.web.stream_request_body
  class APIv1DetailHandler(base.APIMixin, tornado.web.RequestHandler):
         # Allow users to perform uploads
         allow_users = True
  
-       @tornado.web.authenticated
+       def initialize(self):
+               # Buffer to cache the uploaded content
+               self.buffer = io.BytesIO()
+
+       def data_received(self, data):
+               """
+                       Called when some data is being received
+               """
+               self.buffer.write(data)
+
+       # Yes, this does not require authentication. You have seen this correctly.
+       # This is because of us using SPNEGO which might cause a request being sent
+       # more than once, which therefore means that the payload is being transferred
+       # more than once.
+       # To avoid this, we request the digest when the upload is being created, we
+       # then generate a unique ID which an attacker would have to guess first and
+       # then have to upload a file which's hash collides with the original file.
+       async def put(self, uuid):
+               """
+                       Called to store the received payload
+               """
+               # Fetch the upload
+               upload = self.backend.uploads.get_by_uuid(uuid)
+               if not upload:
+                       raise tornado.web.HTTPError(404, "Could not find upload %s" % uuid)
+
+               # Import the payload from the buffer
+               with self.db.transaction():
+                       try:
+                               await upload.copyfrom(self.buffer)
+
+                       except ValueError as e:
+                               raise tornado.web.HTTPError(400, "%s" % e) from e
+
+       @base.negotiate
         async def delete(self, uuid):
                 """
                         Deletes an upload with a certain UUID
author	Michael Tremer <michael.tremer@ipfire.org>
	Thu, 19 Oct 2023 16:03:55 +0000 (16:03 +0000)
committer	Michael Tremer <michael.tremer@ipfire.org>
	Thu, 19 Oct 2023 16:03:55 +0000 (16:03 +0000)
src/buildservice/uploads.py		patch \| blob \| blame \| history
src/database.sql		patch \| blob \| blame \| history
src/web/uploads.py		patch \| blob \| blame \| history