From 478885bc1168350ca2ebcaa220ed7a0f424efecc Mon Sep 17 00:00:00 2001 From: "norbert.bizet" Date: Thu, 9 Nov 2023 13:24:42 -0500 Subject: [PATCH] cloud: test compare upload to AWS with 4 different methods, including bacula post-upload --- regress/tests/aws-upload-time-compare | 244 ++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100755 regress/tests/aws-upload-time-compare diff --git a/regress/tests/aws-upload-time-compare b/regress/tests/aws-upload-time-compare new file mode 100755 index 000000000..38293efbe --- /dev/null +++ b/regress/tests/aws-upload-time-compare @@ -0,0 +1,244 @@ +#!/bin/sh +# +# Copyright (C) 2000-2021 Kern Sibbald +# Copyright (C) 2021-2023 Bacula Systems SA +# License: BSD 2-Clause; see file LICENSE-FOSS +# +# Not a regression test, rather a comparator of different upload strategies to AWS cloud +# +TestName="aws-upload-time-compare" +JobName=NightlySave +. scripts/functions + +require_cloud + +#config is required for cloud cleanup +scripts/copy-test-confs +scripts/cleanup + +FORCE_FILE_SET=${FORCE_FILE_SET:-"${cwd}/build"} +echo "$FORCE_FILE_SET" >${cwd}/tmp/file-list + +NUM_TEST_PARTS=10 + +start_test + +$bperl -e 'add_attribute("$conf/bacula-sd.conf", "MaximumPartSize", "10000000", "Device")' +$bperl -e 'add_attribute("$conf/bacula-sd.conf", "MaximumConcurrentUploads", "10", "Cloud")' +$bperl -e 'add_attribute("$conf/bacula-sd.conf", "TruncateCache", "No", "Cloud")' +$bperl -e 'add_attribute("$conf/bacula-sd.conf", "Upload", "Manual", "Cloud")' + +cat <${cwd}/tmp/bconcmds +@output /dev/null +messages +@$out ${cwd}/tmp/log1.out +label storage=File volume=Vol1 +END_OF_DATA + +# do label +run_bacula + +cat <${cwd}/tmp/bconcmds +@output /dev/null +messages +@$out ${cwd}/tmp/log1.out +@#setdebug level=500 storage +run job=$JobName level=Full yes +wait +messages +END_OF_DATA + +#run a backup to Vol1. It's our placeholder, but we'll substitute the result parts later +run_bconsole +ls -l ${cwd}/tmp/Vol1 + +# use the cloud configuration information to setup aws cli +region=$($bperl -e "get_attribute('$conf/bacula-sd.conf', 'Cloud', '$CLOUD_NAME', 'Region')") +export AWS_DEFAULT_REGION=$region +access_key=$($bperl -e "get_attribute('$conf/bacula-sd.conf', 'Cloud', '$CLOUD_NAME', 'AccessKey')") +export AWS_ACCESS_KEY_ID=$access_key +secret_key=$($bperl -e "get_attribute('$conf/bacula-sd.conf', 'Cloud', '$CLOUD_NAME', 'SecretKey')") +export AWS_SECRET_ACCESS_KEY=$secret_key +BucketName=$($bperl -e "get_attribute('$conf/bacula-sd.conf', 'Cloud', '$CLOUD_NAME', 'BucketName')") +path_to_parts=${cwd}/tmp/Vol1 + +#generate fake random parts with #1 being the size +generate_cache() +{ +i=1 +while [ "$i" -le $NUM_TEST_PARTS ]; do + dd if=/dev/random of=$path_to_parts/part.$i bs=$1M count=1 + i=$(( i + 1 )) +done +} + +#upload a single part#arg1 with python +cat <${cwd}/tmp/aws_python.py +import sys +from subprocess import Popen, PIPE +part=sys.argv[1] +objects_default_tier = "STANDARD" +cmd = ["aws", "s3", "cp", "$path_to_parts/part.%s"%part, "s3://$BucketName/Vol1/part.%s"%part, "--storage-class", objects_default_tier, "--only-show-errors"] +proc = Popen( cmd, stdout=PIPE, stderr=PIPE, universal_newlines=True) +output,err = proc.communicate() +if output: + print(output) +if err: + print(err) +END_OF_DATA + +#Method 1: 10 processes running the above upload method in parallel +cat <${cwd}/tmp/aws_python_comb +#!/bin/bash +for (( i=1; i<=$NUM_TEST_PARTS; i++ )) +do +python3 ${cwd}/tmp/aws_python.py \$i & +done +wait +END_OF_DATA + +chmod 755 ${cwd}/tmp/aws_python_comb + +NUM_TEST_PARTS_PLUS_ONE=$NUM_TEST_PARTS+1 +#Method 2: 10 thread in parallel directly in python +cat <${cwd}/tmp/aws_comb.py +import sys +from subprocess import Popen, PIPE +from multiprocessing import Pool + +def upload(part): + objects_default_tier = "STANDARD" + cmd = ["aws", "s3", "cp", "$path_to_parts/part.%s"%part, "s3://$BucketName/Vol1/part.%s"%part, "--storage-class", objects_default_tier, "--only-show-errors"] + proc = Popen( cmd, stdout=PIPE, stderr=PIPE, universal_newlines=True) + output,err = proc.communicate() + if output: + print(output) + if err: + print(err) +if __name__ == "__main__": + # using multiprocessing + with Pool() as pool: + result = pool.map(upload, range(1,$NUM_TEST_PARTS_PLUS_ONE)) +END_OF_DATA + +#Method 3: Use aws cli recursivity +cat <${cwd}/tmp/aws_recursive.py +import sys +from subprocess import Popen, PIPE + +objects_default_tier = "STANDARD" +cmd = ["aws", "s3", "cp", "$path_to_parts", "s3://$BucketName/Vol1", "--storage-class", objects_default_tier, "--only-show-errors", "--recursive"] +proc = Popen( cmd, stdout=PIPE, stderr=PIPE, universal_newlines=True) +output,err = proc.communicate() +if output: + print(output) +if err: + print(err) +END_OF_DATA + +#Method 4: Use bacula method cloud upload volume=Vol1 +cat <${cwd}/tmp/bconcmds +@$out ${cwd}/tmp/log_uload.out +cloud upload storage=File volume=Vol1 +wait +messages +quit +END_OF_DATA + +# set default values for aws cli +aws configure set default.s3.max_concurrent_requests $NUM_TEST_PARTS + +#remove all +rm ${cwd}/tmp/Vol1/part.* + +#part_size in MB +for part_size in 2 5 24 50 100 +do + echo "generate parts: $part_size MB" + generate_cache $part_size > /dev/null 2>&1 + + high_multipart_threshold=$(($part_size + 1)) + low_multipart_threshold=$(($part_size-1)) + + # Same with multipart threshold above part-size + echo "multipart_threshold ${high_multipart_threshold} MB" + aws configure set default.s3.multipart_threshold ${high_multipart_threshold}MB + + echo "Method#1 (sh comb)..." + start=$(date +%s) + ${cwd}/tmp/aws_python_comb + end=$(date +%s) + echo "...done" + echo "Method#1 duration: $(($end-$start))s" + + aws s3 rm s3://$BucketName/Vol1 --recursive > /dev/null 2>&1 + + echo "Method#2 (python comb)..." + start=$(date +%s) + python3 ${cwd}/tmp/aws_comb.py + end=$(date +%s) + echo "...done" + echo "Method#2 duration: $(($end-$start))s" + + aws s3 rm s3://$BucketName/Vol1 --recursive > /dev/null 2>&1 + + echo "Method#3 (aws cli recursive)..." + start=$(date +%s) + python3 ${cwd}/tmp/aws_recursive.py + end=$(date +%s) + echo "...done" + echo "Method#3 duration: $(($end-$start))s" + + aws s3 rm s3://$BucketName/Vol1 --recursive > /dev/null 2>&1 + + echo "Method#4 (bacula)..." + start=$(date +%s) + run_bconsole > /dev/null 2>&1 + end=$(date +%s) + echo "...done" + echo "Method#4 duration: $(($end-$start))s" + + # Same with multipart threshold under part-size + aws s3 rm s3://$BucketName/Vol1 --recursive > /dev/null 2>&1 + + echo "multipart_threshold ${low_multipart_threshold} MB" + aws configure set default.s3.multipart_threshold ${low_multipart_threshold}MB + # apt-get install time, if /usr/bin/time doesn't exist + echo "Method#1 (sh comb)..." + start=$(date +%s) + ${cwd}/tmp/aws_python_comb + end=$(date +%s) + echo "...done" + echo "Method#1 duration: $(($end-$start))s" + + aws s3 rm s3://$BucketName/Vol1 --recursive > /dev/null 2>&1 + + echo "Method#2 (python comb)..." + start=$(date +%s) + python3 ${cwd}/tmp/aws_comb.py + end=$(date +%s) + echo "...done" + echo "Method#2 duration: $(($end-$start))s" + + aws s3 rm s3://$BucketName/Vol1 --recursive > /dev/null 2>&1 + + echo "Method#3 (aws cli recursive)..." + start=$(date +%s) + python3 ${cwd}/tmp/aws_recursive.py + end=$(date +%s) + echo "...done" + echo "Method#3 duration: $(($end-$start))s" + + aws s3 rm s3://$BucketName/Vol1 --recursive > /dev/null 2>&1 + + echo "Method#4 (bacula)..." + start=$(date +%s) + run_bconsole > /dev/null 2>&1 + end=$(date +%s) + echo "...done" + echo "Method#4 duration: $(($end-$start))s" + + aws s3 rm s3://$BucketName/Vol1 --recursive > /dev/null 2>&1 +done + +end_test -- 2.47.3