From: Francisco Manuel Garcia Botella Date: Mon, 14 Oct 2024 07:16:22 +0000 (+0200) Subject: k8s: Add pvc clean up from old backup jobs X-Git-Tag: Release-15.0.3~10 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=dc96e63a81c5e3c508ba9093732bdfb29dd4c0d6;p=thirdparty%2Fbacula.git k8s: Add pvc clean up from old backup jobs --- diff --git a/bacula/src/plugins/fd/kubernetes-backend/baculak8s/jobs/backup_job.py b/bacula/src/plugins/fd/kubernetes-backend/baculak8s/jobs/backup_job.py index 6807d2fa9..5b6a0dd0f 100644 --- a/bacula/src/plugins/fd/kubernetes-backend/baculak8s/jobs/backup_job.py +++ b/bacula/src/plugins/fd/kubernetes-backend/baculak8s/jobs/backup_job.py @@ -184,8 +184,8 @@ class BackupJob(EstimationJob): if not backup_with_pod and not retry_backup and not self.backup_clone_compatibility: # It is important set to True before recall the process. self.backup_clone_compatibility = True # We only try once - self._io.send_info(RETRY_BACKUP_WITH_STANDARD_MODE) + logging.debug("Sent info to joblog: " + RETRY_BACKUP_WITH_STANDARD_MODE) status = self.process_pvcdata(namespace, orig_pvcdata, backup_with_pod, True) return status diff --git a/bacula/src/plugins/fd/kubernetes-backend/baculak8s/jobs/job_pod_bacula.py b/bacula/src/plugins/fd/kubernetes-backend/baculak8s/jobs/job_pod_bacula.py index db2455e19..352dba5d9 100644 --- a/bacula/src/plugins/fd/kubernetes-backend/baculak8s/jobs/job_pod_bacula.py +++ b/bacula/src/plugins/fd/kubernetes-backend/baculak8s/jobs/job_pod_bacula.py @@ -31,7 +31,7 @@ from baculak8s.plugins.k8sbackend.baculabackup import (BACULABACKUPIMAGE, ImagePullPolicy, prepare_backup_pod_yaml, get_backup_pod_name) -from baculak8s.plugins.k8sbackend.pvcclone import prepare_backup_clone_yaml +from baculak8s.plugins.k8sbackend.pvcclone import prepare_backup_clone_yaml, find_bacula_pvc_clones_from_old_job from baculak8s.plugins.k8sbackend.baculaannotations import BaculaBackupMode from baculak8s.util.respbody import parse_json_descr from baculak8s.util.sslserver import DEFAULTTIMEOUT, ConnectionServer @@ -202,7 +202,7 @@ class JobPodBacula(Job, metaclass=ABCMeta): if namespace is None or pvcname is None or capacity is None or storage_class is None: logging.error("Invalid params to pvc clone!") return None, None - pvcyaml, snapname = prepare_backup_clone_yaml(namespace, pvcname, capacity, storage_class) + pvcyaml, snapname = prepare_backup_clone_yaml(namespace, pvcname, capacity, storage_class, self.jobname) self._io.send_info(PVCCLONE_YAML_PREPARED_INFO.format( namespace=namespace, snapname=snapname, @@ -244,22 +244,12 @@ class JobPodBacula(Job, metaclass=ABCMeta): prev_bacula_pod_name = self._plugin.check_bacula_pod(namespace, self.jobname) if prev_bacula_pod_name: logging.debug('Exist previous bacula-backup pod! Name: {}'.format(prev_bacula_pod_name)) - # TODO: Check if we remove or not the bacula-backup pod - self._io.send_info('Exist a previous bacula-backup pod. Name: {}'.format(prev_bacula_pod_name)) - response = False - for a in range(self.timeout): - time.sleep(1) - response = self._plugin.check_gone_backup_pod(namespace, prev_bacula_pod_name) - if isinstance(response, dict) and 'error' in response: - self._handle_error(CANNOT_REMOVE_BACKUP_POD_ERR.format(parse_json_descr(response))) - return False - else: - if response: - break - if not response: - self._handle_error(POD_EXIST_ERR.format(namespace=namespace, podname=prev_bacula_pod_name)) + self._io.send_info('Exist a previous bacula-backup pod: {}'.format(prev_bacula_pod_name)) + response = self._plugin.remove_backup_pod(namespace, prev_bacula_pod_name) + if isinstance(response, dict) and 'error' in response: + self._handle_error(CANNOT_REMOVE_BACKUP_POD_ERR.format(parse_json_descr(response))) return False - + self._io.send_info('Removed previous bacula-backup pod: {}'.format(prev_bacula_pod_name)) poddata = yaml.safe_load(podyaml) response = self._plugin.create_backup_pod(namespace, poddata) if isinstance(response, dict) and 'error' in response: @@ -341,9 +331,27 @@ class JobPodBacula(Job, metaclass=ABCMeta): return False return True + def cleanup_old_cloned_pvc(self, namespace): + logging.debug("Starting cleanup the old pvc from other previous job") + # List all pvc names from namespace + pvc_names = self._plugin.get_pvc_names(namespace) + # Search old cloned pvcs + old_clone_pvcs = find_bacula_pvc_clones_from_old_job(pvc_names, self.jobname) + # Remove old pvcs + for old_clone_pvc in old_clone_pvcs: + self._io.send_info(f"Exist a previous bacula-backup pvc. Name: {old_clone_pvc}") + response = self._plugin.delete_persistent_volume_claim(namespace, old_clone_pvc) + logging.debug(f"Response from pvc `{old_clone_pvc}` delete request: {response}") + if isinstance(response, dict) and "error" in response: + return response + self._io.send_info(f"Removed the previous bacula-backup pvc: {old_clone_pvc}") + logging.debug("Deleted request launched successfully") + logging.debug("Ended cleanup the old pvcs from other previous job") + def create_pvcclone(self, namespace, pvcname): clonename = None logging.debug("pvcclone for:{}/{}".format(namespace, pvcname)) + self.cleanup_old_cloned_pvc(namespace) pvcdata = self._plugin.get_pvcdata_namespaced(namespace, pvcname) if isinstance(pvcdata, dict) and 'exception' in pvcdata: self._handle_error(PVCDATA_GET_ERROR.format(parse_json_descr(pvcdata))) diff --git a/bacula/src/plugins/fd/kubernetes-backend/baculak8s/plugins/k8sbackend/persistentvolumeclaims.py b/bacula/src/plugins/fd/kubernetes-backend/baculak8s/plugins/k8sbackend/persistentvolumeclaims.py index e96e3b2d3..21a97d8c1 100644 --- a/bacula/src/plugins/fd/kubernetes-backend/baculak8s/plugins/k8sbackend/persistentvolumeclaims.py +++ b/bacula/src/plugins/fd/kubernetes-backend/baculak8s/plugins/k8sbackend/persistentvolumeclaims.py @@ -37,6 +37,12 @@ from baculak8s.plugins.k8sbackend.k8sutils import prepare_metadata def persistentvolumeclaims_read_namespaced(corev1api, namespace, name): return corev1api.read_namespaced_persistent_volume_claim(name, namespace) +def persistentvolumeclaims_namespaced_only_names(corev1api, namespace, labels=""): + pvcslist = [] + pvcs = corev1api.list_namespaced_persistent_volume_claim(namespace=namespace, watch=False, label_selector=labels) + for pvc in pvcs.items: + pvcslist.append(pvc.metadata.name) + return pvcslist def persistentvolumeclaims_namespaced_names(corev1api, namespace, labels=""): pvcslist = [] @@ -64,8 +70,7 @@ def persistentvolumeclaims_list_namespaced(corev1api, namespace, estimate=False, if (pvcdata.status.capacity is not None): pvcsize = k8s_size_to_int(pvcdata.status.capacity['storage']) pvcstotalsize += pvcsize - # logging.debug("PVCDATA-SIZE:{} {}".format(pvcdata.status.capacity['storage'], pvcsize)) - # logging.debug("PVCDATA-ENC:{}".format(spec)) + pvcslist['pvc-' + pvc.metadata.name] = { 'spec': spec if not estimate else None, 'fi': k8sfileinfo(objtype=K8SObjType.K8SOBJ_PVOLCLAIM, nsname=namespace, diff --git a/bacula/src/plugins/fd/kubernetes-backend/baculak8s/plugins/k8sbackend/pvcclone.py b/bacula/src/plugins/fd/kubernetes-backend/baculak8s/plugins/k8sbackend/pvcclone.py index ba3b09ef5..03ba51c2f 100644 --- a/bacula/src/plugins/fd/kubernetes-backend/baculak8s/plugins/k8sbackend/pvcclone.py +++ b/bacula/src/plugins/fd/kubernetes-backend/baculak8s/plugins/k8sbackend/pvcclone.py @@ -21,11 +21,16 @@ # import os -import string +import json +import logging from baculak8s.util.token import generate_token DEFAULTCLONEYAML = os.getenv('DEFAULTCLONEYAML', "/opt/bacula/scripts/bacula-backup-clone.yaml") +PREFIX_CLONE_PVC_BACKUP_NAME = "bacula-pvcclone-{job_name}-" +CLONE_PVC_BACKUP_NAME=PREFIX_CLONE_PVC_BACKUP_NAME + "{job_id}" +JOB_NAME_MAX_CHARS=25 +JOB_ID_MAX_DIGITS=12 CLONETEMPLATE = """ apiVersion: v1 kind: PersistentVolumeClaim @@ -46,8 +51,23 @@ spec: storage: {pvcsize} """ +def find_bacula_pvc_clones_from_old_job(pvc_list, job): + name_for_search = PREFIX_CLONE_PVC_BACKUP_NAME.format(job_name=job.split('.')[0][:JOB_NAME_MAX_CHARS].lower()) + num_hyphen=name_for_search.count('-') + old_pvcs = [] + for pvc_name in pvc_list: + if pvc_name.startswith(name_for_search) and num_hyphen == pvc_name.count('-'): + old_pvcs.append(pvc_name) + logging.debug(f"Found old cloned pvcs: {old_pvcs}") + return old_pvcs -def prepare_backup_clone_yaml(namespace, pvcname, pvcsize, scname, clonename=None): +def get_clone_pvc_name(job): + # Get job name and id, and limit to not exceed 63 characters in pvc name + job_name = job.split('.')[0][:JOB_NAME_MAX_CHARS].lower() + job_id = job.split(':')[1][:JOB_ID_MAX_DIGITS] + return CLONE_PVC_BACKUP_NAME.format(job_name=job_name, job_id=job_id) + +def prepare_backup_clone_yaml(namespace, pvcname, pvcsize, scname, jobname, clonename=None): """ Handles PVC clone yaml preparation based on available templates Args: @@ -55,6 +75,7 @@ def prepare_backup_clone_yaml(namespace, pvcname, pvcsize, scname, clonename=Non pvcname (str): source pvc name to clone from pvcsize (str): k8s capacity of the original pvc scname (str): storage class of the original pvc + job (str): Job name to add to cloned pvc name clonename (str, optional): the cloned - destination - pvcname; if `None` then name will be assigned automatically. Defaults to None. Returns: @@ -65,7 +86,10 @@ def prepare_backup_clone_yaml(namespace, pvcname, pvcsize, scname, clonename=Non with open(DEFAULTCLONEYAML, 'r') as file: cloneyaml = file.read() if clonename is None: - validchars = tuple(string.ascii_lowercase) + tuple(string.digits) - clonename = "{pvcname}-baculaclone-{id}".format(pvcname=pvcname, id=generate_token(size=6, chars=validchars)) - + clonename = get_clone_pvc_name(jobname) return cloneyaml.format(namespace=namespace, pvcname=pvcname, pvcsize=pvcsize, clonename=clonename, storageclassname=scname), clonename + +def delete_pvcclone(corev1api, namespace, pvc_name, grace_period_seconds=0, propagation_policy='Foreground'): + return corev1api.delete_namespaced_persistent_volume_claim( + pvc_name, namespace, grace_period_seconds=grace_period_seconds, + propagation_policy=propagation_policy) diff --git a/bacula/src/plugins/fd/kubernetes-backend/baculak8s/plugins/kubernetes_plugin.py b/bacula/src/plugins/fd/kubernetes-backend/baculak8s/plugins/kubernetes_plugin.py index a42e4884e..2eb38a64b 100644 --- a/bacula/src/plugins/fd/kubernetes-backend/baculak8s/plugins/kubernetes_plugin.py +++ b/bacula/src/plugins/fd/kubernetes-backend/baculak8s/plugins/kubernetes_plugin.py @@ -45,6 +45,7 @@ from baculak8s.plugins.k8sbackend.persistentvolumes import * from baculak8s.plugins.k8sbackend.pods import * from baculak8s.plugins.k8sbackend.podtemplates import * from baculak8s.plugins.k8sbackend.pvcdata import * +from baculak8s.plugins.k8sbackend.pvcclone import delete_pvcclone from baculak8s.plugins.k8sbackend.replicaset import * from baculak8s.plugins.k8sbackend.replicationcontroller import * from baculak8s.plugins.k8sbackend.resourcequota import * @@ -371,6 +372,9 @@ class KubernetesPlugin(Plugin): else: return response # it should be a dictionary with an error + def get_pvc_names(self, namespace): + return self.__execute(lambda: persistentvolumeclaims_namespaced_only_names(self.corev1api, namespace)) + def get_podtemplates(self, namespace, estimate=False): return self.__execute(lambda: podtemplates_list_namespaced(self.corev1api, namespace, estimate, self.config['labels'])) @@ -700,6 +704,11 @@ class KubernetesPlugin(Plugin): lambda: self.corev1api.read_namespaced_service_account(k8sfile2objname(file_info.name), file_info.namespace)) + def delete_persistent_volume_claim(self, namespace, pvc_name, grace_period_seconds=0, propagation_policy="Foreground"): + return self.__execute(lambda: delete_pvcclone(self.corev1api, + namespace, pvc_name, grace_period_seconds=grace_period_seconds, + propagation_policy=propagation_policy)) + def check_file(self, file_info): """ @@ -1018,11 +1027,9 @@ class KubernetesPlugin(Plugin): return {} def remove_pvcclone(self, namespace, clonename): - logging.debug('remove_pvcclone `{}`'.format(clonename)) - response = self.__execute(lambda: self.corev1api.delete_namespaced_persistent_volume_claim( - clonename, namespace, grace_period_seconds=0, - propagation_policy='Foreground')) - logging.debug("Response: {}".format(response)) + logging.debug('Remove pvcclone `{}`'.format(clonename)) + response = self.delete_persistent_volume_claim(namespace, clonename) + logging.debug("Response from delete pvc: {}".format(response)) if isinstance(response, dict) and "error" in response: return response r1 = self.get_pvcdata_namespaced(namespace, clonename)