]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Changes the cleanup images workflow so it uses a OAuth token with the correct scope...
authorTrenton Holmes <holmes.trenton@gmail.com>
Tue, 26 Jul 2022 22:41:57 +0000 (15:41 -0700)
committerTrenton Holmes <holmes.trenton@gmail.com>
Wed, 3 Aug 2022 17:19:15 +0000 (10:19 -0700)
.github/scripts/cleanup-tags.py
.github/workflows/cleanup-tags.yml

index 592bd9e9fcbbf90fa501dfe701ab90a1e6d94858..ea8e69b6a2b047ff8f1b744e0bc9394e6645d48d 100644 (file)
@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
 import logging
 import os
 from argparse import ArgumentParser
@@ -35,6 +36,10 @@ class GithubContainerRegistry:
             self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}"
 
     def __enter__(self):
+        """
+        Sets up the required headers for auth and response
+        type from the API
+        """
         self._session.headers.update(
             {
                 "Accept": "application/vnd.github.v3+json",
@@ -44,12 +49,20 @@ class GithubContainerRegistry:
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb):
+        """
+        Ensures the authorization token is cleaned up no matter
+        the reason for the exit
+        """
         if "Accept" in self._session.headers:
             del self._session.headers["Accept"]
         if "Authorization" in self._session.headers:
             del self._session.headers["Authorization"]
 
     def _read_all_pages(self, endpoint):
+        """
+        Internal function to read all pages of an endpoint, utilizing the
+        next.url until exhausted
+        """
         internal_data = []
 
         while True:
@@ -68,11 +81,18 @@ class GithubContainerRegistry:
         return internal_data
 
     def get_branches(self, repo: str):
+        """
+        Returns all current branches of the given repository
+        """
         endpoint = self._BRANCHES_ENDPOINT.format(OWNER=self._owner_or_org, REPO=repo)
         internal_data = self._read_all_pages(endpoint)
         return internal_data
 
     def filter_branches_by_name_pattern(self, branch_data, pattern: str):
+        """
+        Filters the given list of branches to those which start with the given
+        pattern.  Future enhancement could use regex patterns instead.
+        """
         matches = {}
 
         for branch in branch_data:
@@ -86,6 +106,10 @@ class GithubContainerRegistry:
         package_name: str,
         package_type: str = "container",
     ) -> List:
+        """
+        Returns all the versions of a given package (container images) from
+        the API
+        """
         package_name = quote(package_name, safe="")
         endpoint = self._PACKAGES_VERSIONS_ENDPOINT.format(
             ORG=self._owner_or_org,
@@ -98,6 +122,10 @@ class GithubContainerRegistry:
         return internal_data
 
     def filter_packages_by_tag_pattern(self, package_data, pattern: str):
+        """
+        Filters the given package version info to those where the tags of the image
+        containers at least 1 tag which starts with the given pattern.
+        """
         matches = {}
 
         for package in package_data:
@@ -113,6 +141,9 @@ class GithubContainerRegistry:
         return matches
 
     def filter_packages_untagged(self, package_data):
+        """
+        Filters the given package data to those which have no tags at all
+        """
         matches = {}
 
         for package in package_data:
@@ -126,13 +157,10 @@ class GithubContainerRegistry:
         return matches
 
     def delete_package_version(self, package_name, package_data):
-        package_name = quote(package_name, safe="")
-        endpoint = self._PACKAGE_VERSION_DELETE_ENDPOINT.format(
-            ORG=self._owner_or_org,
-            PACKAGE_TYPE=package_data["metadata"]["package_type"],
-            PACKAGE_NAME=package_name,
-            PACKAGE_VERSION_ID=package_data["id"],
-        )
+        """
+        Deletes the given package version from the GHCR
+        """
+        endpoint = package_data["url"]
         resp = self._session.delete(endpoint)
         if resp.status_code != 204:
             logger.warning(
@@ -146,6 +174,7 @@ def _main():
         " tags which no longer have an associated feature branch",
     )
 
+    # Requires an affirmative command to actually do a delete
     parser.add_argument(
         "--delete",
         action="store_true",
@@ -153,7 +182,8 @@ def _main():
         help="If provided, actually delete the container tags",
     )
 
-    # TODO There's a lot of untagged images, do those need to stay for anything?
+    # When a tagged image is updated, the previous version remains, but it no longer tagged
+    # Add this option to remove them as well
     parser.add_argument(
         "--untagged",
         action="store_true",
@@ -161,6 +191,7 @@ def _main():
         help="If provided, delete untagged containers as well",
     )
 
+    # Allows configuration of log level for debugging
     parser.add_argument(
         "--loglevel",
         default="info",
@@ -175,28 +206,34 @@ def _main():
         format="%(asctime)s %(levelname)-8s %(message)s",
     )
 
+    # Must be provided in the environment
     repo_owner: Final[str] = os.environ["GITHUB_REPOSITORY_OWNER"]
     repo: Final[str] = os.environ["GITHUB_REPOSITORY"]
-    gh_token: Final[str] = os.environ["GITHUB_TOKEN"]
+    gh_token: Final[str] = os.environ["TOKEN"]
 
     with requests.session() as sess:
         with GithubContainerRegistry(sess, gh_token, repo_owner) as gh_api:
+            # Step 1 - Locate all branches of the repo
             all_branches = gh_api.get_branches("paperless-ngx")
             logger.info(f"Located {len(all_branches)} branches of {repo_owner}/{repo} ")
 
+            # Step 2 - Filter branches to those starting with "feature-"
             feature_branches = gh_api.filter_branches_by_name_pattern(
                 all_branches,
                 "feature-",
             )
             logger.info(f"Located {len(feature_branches)} feature branches")
 
+            # Step 3 - Deal with package information
             for package_name in ["paperless-ngx", "paperless-ngx/builder/cache/app"]:
 
+                # Step 3.1 - Location all versions of the given package
                 all_package_versions = gh_api.get_package_versions(package_name)
                 logger.info(
                     f"Located {len(all_package_versions)} versions of package {package_name}",
                 )
 
+                # Step 3.2 - Location package versions which have a tag of "feature-"
                 packages_tagged_feature = gh_api.filter_packages_by_tag_pattern(
                     all_package_versions,
                     "feature-",
@@ -205,6 +242,8 @@ def _main():
                     f'Located {len(packages_tagged_feature)} versions of package {package_name} tagged "feature-"',
                 )
 
+                # Step 3.3 - Location package versions with no tags at all
+                # TODO: What exactly are these?  Leftovers?
                 untagged_packages = gh_api.filter_packages_untagged(
                     all_package_versions,
                 )
@@ -212,6 +251,7 @@ def _main():
                     f"Located {len(untagged_packages)} untagged versions of package {package_name}",
                 )
 
+                # Step 3.4 - Determine which package versions have no matching branch
                 to_delete = list(
                     set(packages_tagged_feature.keys()) - set(feature_branches.keys()),
                 )
@@ -219,6 +259,7 @@ def _main():
                     f"Located {len(to_delete)} versions of package {package_name} to delete",
                 )
 
+                # Step 3.5 - Delete certain package versions
                 for tag_to_delete in to_delete:
                     package_version_info = packages_tagged_feature[tag_to_delete]
 
@@ -236,16 +277,22 @@ def _main():
                             f"Would delete {tag_to_delete} (id {package_version_info['id']})",
                         )
 
+                # Step 3.6 - Delete untagged package versions
                 if args.untagged:
                     logger.info(f"Deleting untagged packages of {package_name}")
                     for to_delete_name in untagged_packages:
                         to_delete_version = untagged_packages[to_delete_name]
-                        logger.info(f"Deleting id {to_delete_version['id']}")
+
                         if args.delete:
+                            logger.info(f"Deleting id {to_delete_version['id']}")
                             gh_api.delete_package_version(
                                 package_name,
                                 to_delete_version,
                             )
+                        else:
+                            logger.info(
+                                f"Would delete {to_delete_name} (id {to_delete_version['id']})",
+                            )
                 else:
                     logger.info("Leaving untagged images untouched")
 
index 5041bf0ee458490d91934aec864f25ec3d53b013..97e0081d2b83f8e7a976cfeabeb380d71d0e61e8 100644 (file)
@@ -1,3 +1,8 @@
+# This workflow runs on certain conditions to check for and potentially
+# delete container images from the GHCR which no longer have an associated
+# code branch.
+# Requires a PAT with the correct scope set in the secrets
+
 name: Cleanup Image Tags
 
 on:
@@ -13,15 +18,13 @@ on:
       - ".github/scripts/cleanup-tags.py"
       - ".github/scripts/common.py"
 
-env:
-  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
 jobs:
   cleanup:
     name: Cleanup Image Tags
     runs-on: ubuntu-20.04
-    permissions:
-      packages: write
+    env:
+      # Requires a personal access token with the OAuth scope delete:packages
+      TOKEN: ${{ secrets.GHA_CONTAINER_DELETE_TOKEN }}
     steps:
       -
         name: Checkout
@@ -44,5 +47,7 @@ jobs:
           python -m pip install requests
       -
         name: Cleanup feature tags
+        # Only run if the token is not empty
+        if: "${{ env.TOKEN != '' }}"
         run: |
-          python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --loglevel info --delete
+          python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --loglevel info --untagged --delete