From: Michael Tremer Date: Wed, 29 Jan 2025 17:15:46 +0000 (+0000) Subject: uploads: Calculate the digest when receiving the file X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1681783decf394b41ddf09c41f18bad21d344e66;p=pbs.git uploads: Calculate the digest when receiving the file Signed-off-by: Michael Tremer --- diff --git a/src/buildservice/jobs.py b/src/buildservice/jobs.py index b02753a1..0d94f89f 100644 --- a/src/buildservice/jobs.py +++ b/src/buildservice/jobs.py @@ -1045,13 +1045,10 @@ class Job(database.Base, database.BackendMixin, database.SoftDeleteMixin): with gzip.open(path, "wb", compresslevel=9) as f: await upload.copyinto(f) - # Compute a digest for integrity - digest = await upload.digest("blake2s") - # Store everything in the database self.log_path = path - self.log_size = size - self.log_digest_blake2s = digest + self.log_size = upload.size + self.log_digest_blake2s = upload.digest_blake2b512 # Consume the upload object await upload.delete() diff --git a/src/buildservice/uploads.py b/src/buildservice/uploads.py index a5394049..fff733f7 100644 --- a/src/buildservice/uploads.py +++ b/src/buildservice/uploads.py @@ -20,13 +20,6 @@ from .constants import * # Setup logging log = logging.getLogger("pbs.uploads") -supported_digest_algos = ( - "blake2b512", -) - -class UnsupportedDigestException(ValueError): - pass - class Uploads(base.Object): def __aiter__(self): stmt = ( @@ -50,18 +43,14 @@ class Uploads(base.Object): return await self.db.fetch_one(stmt) - async def create(self, filename, size, digest_algo, digest, owner=None): + async def create(self, filename, size, digest_blake2b512, owner=None): """ Creates a new upload """ builder, user = None, None - # Check if the digest algorithm is supported - if not digest_algo in supported_digest_algos: - raise UnsupportedDigestException(digest_algo) - # Check if the digest is set - elif not digest: + if not digest_blake2b512: raise ValueError("Empty digest") # Check uploader type @@ -78,12 +67,11 @@ class Uploads(base.Object): # Create a new upload upload = await self.db.insert( Upload, - filename = filename, - size = size, - builder = builder, - user = user, - digest_algo = digest_algo, - digest = digest, + filename = filename, + size = size, + builder = builder, + user = user, + digest_blake2b512 = digest_blake2b512, ) # Return the newly created upload object @@ -159,13 +147,9 @@ class Upload(database.Base, database.BackendMixin): size = Column(BigInteger, nullable=False) - # Digest Algo - - digest_algo = Column(Text, nullable=False) - # Digest - digest = Column(LargeBinary, nullable=False) + digest_blake2b512 = Column(LargeBinary, nullable=False) # Builder ID @@ -244,7 +228,7 @@ class Upload(database.Base, database.BackendMixin): src.seek(0) # Setup the hash function - h = hashlib.new(self.digest_algo) + h = hashlib.new("blake2b512") async with self.backend.tempfile(delete=False) as f: try: @@ -270,9 +254,9 @@ class Upload(database.Base, database.BackendMixin): raise ValueError("File size differs") # Check that the digest matches - if not hmac.compare_digest(computed_digest, self.digest): + if not hmac.compare_digest(computed_digest, self.digest_blake2b512): log.error("Upload %s had an incorrect digest:" % self) - log.error(" Expected: %s" % self.digest.hex()) + log.error(" Expected: %s" % self.digest_blake2b512.hex()) log.error(" Got : %s" % computed_digest.hex()) raise ValueError("Invalid digest") diff --git a/src/database.sql b/src/database.sql index 22556c01..bee2ec83 100644 --- a/src/database.sql +++ b/src/database.sql @@ -1067,8 +1067,7 @@ CREATE TABLE public.uploads ( size bigint NOT NULL, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, expires_at timestamp without time zone DEFAULT (CURRENT_TIMESTAMP + '24:00:00'::interval) NOT NULL, - digest_algo text NOT NULL, - digest bytea NOT NULL + digest_blake2b512 bytea NOT NULL ); diff --git a/src/web/uploads.py b/src/web/uploads.py index 91c31ee4..97d1269f 100644 --- a/src/web/uploads.py +++ b/src/web/uploads.py @@ -20,6 +20,8 @@ ############################################################################### import errno +import hashlib +import hmac import io import tornado.web @@ -64,15 +66,12 @@ class APIv1IndexHandler(base.APIMixin, base.BaseHandler): # Fetch file size size = self.get_argument_int("size") - # Fetch the digest algorithm - digest_algo = self.get_argument("hexdigest_algo") - # Fetch the digest - hexdigest = self.get_argument("hexdigest") + hexdigest_blake2b512 = self.get_argument("hexdigest_blake2b512") # Convert hexdigest try: - digest = bytes.fromhex(hexdigest) + digest_blake2b512 = bytes.fromhex(hexdigest_blake2b512) except ValueError as e: raise tornado.web.HTTPError(400, "Invalid hexdigest") from e @@ -80,16 +79,12 @@ class APIv1IndexHandler(base.APIMixin, base.BaseHandler): async with await self.db.transaction(): try: upload = await self.backend.uploads.create( - filename = filename, - size = size, - owner = self.current_user, - digest_algo = digest_algo, - digest = digest, + filename = filename, + size = size, + owner = self.current_user, + digest_blake2b512 = digest_blake2b512, ) - except uploads.UnsupportedDigestException as e: - raise base.APIError(errno.ENOTSUP, "Unsupported digest %s" % digest_algo) from e - except users.QuotaExceededError as e: raise base.APIError(errno.EDQUOT, "Quota exceeded for %s" % self.current_user) from e @@ -117,14 +112,27 @@ class APIv1DetailHandler(base.APIMixin, base.BaseHandler): # Allow users to perform uploads allow_users = True + # Compute a hash while receiving the file + hashes = ( + "blake2b512", + ) + def initialize(self): # Buffer to cache the uploaded content self.buffer = io.BytesIO() + # Initalize the hashers + self.hashers = { h : hashlib.new(h) for h in self.hashes } + def data_received(self, data): """ Called when some data is being received """ + # Hash the data + for h in self.hashers: + self.hashers[h].update(data) + + # Write the data to the buffer self.buffer.write(data) @base.negotiate @@ -163,6 +171,17 @@ class APIv1DetailHandler(base.APIMixin, base.BaseHandler): if not self.buffer.tell(): raise base.APIError(errno.ENODATA, "No data received") + # Finalize digests of received data + digests = { + h : self.hashers[h].digest() for h in self.hashers + } + + # Check if digests match + for algo, digest in digests.items(): + if algo == "blake2b512": + if not hmac.compare_digest(upload.digest_blake2b512, digest): + raise tornado.web.HTTPError(409, "%s digest does not match" % algo) + # Import the payload from the buffer async with await self.db.transaction(): try: