]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Fine, I made my own GitHub API interface. With blackjack and ... 1140/head
authorTrenton Holmes <holmes.trenton@gmail.com>
Fri, 17 Jun 2022 15:02:34 +0000 (08:02 -0700)
committerTrenton Holmes <holmes.trenton@gmail.com>
Fri, 17 Jun 2022 15:11:15 +0000 (08:11 -0700)
.github/scripts/cleanup-tags.py [changed mode: 0755->0644]
.github/scripts/common.py
.github/workflows/cleanup-tags.yml

old mode 100755 (executable)
new mode 100644 (file)
index f34e16c..a77b5fa
-#!/usr/bin/env python3
-"""
-When a feature branch is created, a new GitHub container is built and tagged
-with the feature branch name.  When a feature branch is deleted, either through
-a merge or deletion, the old image tag will still exist.
-
-Though this isn't a problem for storage size, etc, it does lead to a long list
-of tags which are no longer relevant and the last released version is pushed
- further and further down that list.
-
-This script utlizes the GitHub API (through the gh cli application) to list the
-package versions (aka tags) and the repository branches.  Then it removes feature
-tags which have no matching branch
-
-This pruning is applied to the primary package, the frontend builder package and the
-frontend build cache package.
-
-"""
-import argparse
 import logging
-import os.path
-import pprint
-from typing import Dict
+import os
+from argparse import ArgumentParser
 from typing import Final
 from typing import List
+from urllib.parse import quote
 
+import requests
 from common import get_log_level
-from ghapi.all import GhApi
-from ghapi.all import paged
-
-
-def _get_feature_packages(
-    logger: logging.Logger,
-    api: GhApi,
-    is_org_repo: bool,
-    repo_owner: str,
-    package_name: str,
-) -> Dict:
-    """
-    Uses the GitHub packages API endpoint data filter to containers
-    which have a tag starting with "feature-"
-    """
-
-    # Get all package versions
-    pkg_versions = []
-    if is_org_repo:
-
-        for pkg_version in paged(
-            api.packages.get_all_package_versions_for_package_owned_by_org,
-            org=repo_owner,
-            package_type="container",
-            package_name=package_name,
-        ):
-            pkg_versions.extend(pkg_version)
-    else:
-        for pkg_version in paged(
-            api.packages.get_all_package_versions_for_package_owned_by_authenticated_user,  # noqa: E501
-            package_type="container",
-            package_name=package_name,
-        ):
-            pkg_versions.extend(pkg_version)
-
-    logger.debug(f"Found {len(pkg_versions)} package versions for {package_name}")
-
-    # Filter to just those containers tagged "feature-"
-    feature_versions = {}
-
-    for item in pkg_versions:
-        is_feature_version = False
-        feature_tag_name = None
-        if (
-            "metadata" in item
-            and "container" in item["metadata"]
-            and "tags" in item["metadata"]["container"]
-        ):
-            for tag in item["metadata"]["container"]["tags"]:
-                if tag.startswith("feature-"):
-                    feature_tag_name = tag
-                    is_feature_version = True
-        if is_feature_version:
-            logger.info(
-                f"Located feature tag: {feature_tag_name} for image {package_name}",
-            )
-            # logger.debug(pprint.pformat(item, indent=2))
-            feature_versions[feature_tag_name] = item
+
+logger = logging.getLogger("cleanup-tags")
+
+
+class GithubContainerRegistry:
+    def __init__(
+        self,
+        session: requests.Session,
+        token: str,
+        owner_or_org: str,
+    ):
+        self._session: requests.Session = session
+        self._token = token
+        self._owner_or_org = owner_or_org
+        self._BRANCHES_ENDPOINT = "https://api.github.com/repos/{OWNER}/{REPO}/branches"
+        if self._owner_or_org == "paperless-ngx":
+            self._PACKAGES_VERSIONS_ENDPOINT = "https://api.github.com/orgs/{ORG}/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions"
+            self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/orgs/{ORG}/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}"
         else:
-            logger.debug(f"Filtered {pprint.pformat(item, indent=2)}")
+            self._PACKAGES_VERSIONS_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions"
+            self._PACKAGE_VERSION_DELETE_ENDPOINT = "https://api.github.com/user/packages/{PACKAGE_TYPE}/{PACKAGE_NAME}/versions/{PACKAGE_VERSION_ID}"
+
+    def __enter__(self):
+        self._session.headers.update(
+            {
+                "Accept": "application/vnd.github.v3+json",
+                "Authorization": f"token {self._token}",
+            },
+        )
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if "Accept" in self._session.headers:
+            del self._session.headers["Accept"]
+        if "Authorization" in self._session.headers:
+            del self._session.headers["Authorization"]
+
+    def _read_all_pages(self, endpoint):
+        internal_data = []
+
+        while True:
+            resp = self._session.get(endpoint)
+            if resp.status_code == 200:
+                internal_data += resp.json()
+                if "next" in resp.links:
+                    endpoint = resp.links["next"]["url"]
+                else:
+                    logger.debug("Exiting pagination loop")
+                    break
+            else:
+                logger.warning(f"Request to {endpoint} return HTTP {resp.status_code}")
+                break
+
+        return internal_data
+
+    def get_branches(self, repo: str):
+        endpoint = self._BRANCHES_ENDPOINT.format(OWNER=self._owner_or_org, REPO=repo)
+        internal_data = self._read_all_pages(endpoint)
+        return internal_data
+
+    def filter_branches_by_name_pattern(self, branch_data, pattern: str):
+        matches = {}
+
+        for branch in branch_data:
+            if branch["name"].startswith(pattern):
+                matches[branch["name"]] = branch
+
+        return matches
+
+    def get_package_versions(
+        self,
+        package_name: str,
+        package_type: str = "container",
+    ) -> List:
+        package_name = quote(package_name, safe="")
+        endpoint = self._PACKAGES_VERSIONS_ENDPOINT.format(
+            ORG=self._owner_or_org,
+            PACKAGE_TYPE=package_type,
+            PACKAGE_NAME=package_name,
+        )
 
-    logger.info(
-        f"Found {len(feature_versions)} package versions for"
-        f" {package_name} with feature tags",
-    )
+        internal_data = self._read_all_pages(endpoint)
+
+        return internal_data
+
+    def filter_packages_by_tag_pattern(self, package_data, pattern: str):
+        matches = {}
+
+        for package in package_data:
+            if "metadata" in package and "container" in package["metadata"]:
+                container_metadata = package["metadata"]["container"]
+                if "tags" in container_metadata:
+                    container_tags = container_metadata["tags"]
+                    for tag in container_tags:
+                        if tag.startswith(pattern):
+                            matches[tag] = package
+                            break
+
+        return matches
+
+    def filter_packages_untagged(self, package_data):
+        matches = {}
+
+        for package in package_data:
+            if "metadata" in package and "container" in package["metadata"]:
+                container_metadata = package["metadata"]["container"]
+                if "tags" in container_metadata:
+                    container_tags = container_metadata["tags"]
+                    if not len(container_tags):
+                        matches[package["name"]] = package
+
+        return matches
+
+    def delete_package_version(self, package_name, package_data):
+        package_name = quote(package_name, safe="")
+        endpoint = self._PACKAGE_VERSION_DELETE_ENDPOINT.format(
+            ORG=self._owner_or_org,
+            PACKAGE_TYPE=package_data["metadata"]["package_type"],
+            PACKAGE_NAME=package_name,
+            PACKAGE_VERSION_ID=package_data["id"],
+        )
+        resp = self._session.delete(endpoint)
+        if resp.status_code != 204:
+            logger.warning(
+                f"Request to delete {endpoint} returned HTTP {resp.status_code}",
+            )
 
-    return feature_versions
 
+class DockerHubContainerRegistery:
+    def __init__(self):
+        pass
 
-def _main():
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pass
+
+    def get_image_versions(self) -> List:
+        return []
 
-    parser = argparse.ArgumentParser(
+    def delete_image_version(self):
+        pass
+
+
+def _main():
+    parser = ArgumentParser(
         description="Using the GitHub API locate and optionally delete container"
         " tags which no longer have an associated feature branch",
     )
@@ -108,6 +165,14 @@ def _main():
         help="If provided, actually delete the container tags",
     )
 
+    # TODO There's a lot of untagged images, do those need to stay for anything?
+    parser.add_argument(
+        "--untagged",
+        action="store_true",
+        default=False,
+        help="If provided, delete untagged containers as well",
+    )
+
     parser.add_argument(
         "--loglevel",
         default="info",
@@ -122,89 +187,82 @@ def _main():
         format="%(asctime)s %(levelname)-8s %(message)s",
     )
 
-    logger = logging.getLogger("cleanup-tags")
-
-    repo: Final[str] = os.environ["GITHUB_REPOSITORY"]
     repo_owner: Final[str] = os.environ["GITHUB_REPOSITORY_OWNER"]
+    repo: Final[str] = os.environ["GITHUB_REPOSITORY"]
+    gh_token: Final[str] = os.environ["GITHUB_TOKEN"]
 
-    is_org_repo: Final[bool] = repo_owner == "paperless-ngx"
-    dry_run: Final[bool] = not args.delete
-
-    logger.debug(f"Org Repo? {is_org_repo}")
-    logger.debug(f"Dry Run? {dry_run}")
-
-    api = GhApi(
-        owner=repo_owner,
-        repo=os.path.basename(repo),
-        token=os.environ["GITHUB_TOKEN"],
-    )
-
-    pkg_list: Final[List[str]] = [
-        "paperless-ngx",
-        # TODO: It would be nice to cleanup additional packages, but we can't
-        # see https://github.com/fastai/ghapi/issues/84
-        # "builder/frontend",
-        # "builder-frontend-cache",
-    ]
-
-    # Get the list of current "feature-" branches
-    feature_branch_info = api.list_branches(prefix="feature-")
-    feature_branch_names = []
-    for branch in feature_branch_info:
-        name_only = branch["ref"].removeprefix("refs/heads/")
-        logger.info(f"Located feature branch: {name_only}")
-        feature_branch_names.append(name_only)
-
-    logger.info(f"Located {len(feature_branch_names)} feature branches")
-
-    # TODO The deletion doesn't yet actually work
-    # See https://github.com/fastai/ghapi/issues/132
-    # This would need to be updated to use gh cli app or requests or curl
-    # or something
-    if is_org_repo:
-        endpoint = (
-            "https://api.github.com/orgs/{ORG}/packages/container/{name}/versions/{id}"
-        )
-    else:
-        endpoint = "https://api.github.com/user/packages/container/{name}/{id}"
-
-    for package_name in pkg_list:
-
-        logger.info(f"Processing image {package_name}")
+    with requests.session() as sess:
+        with GithubContainerRegistry(sess, gh_token, repo_owner) as gh_api:
+            all_branches = gh_api.get_branches("paperless-ngx")
+            logger.info(f"Located {len(all_branches)} branches of {repo_owner}/{repo} ")
 
-        # Get the list of images tagged with "feature-"
-        feature_packages = _get_feature_packages(
-            logger,
-            api,
-            is_org_repo,
-            repo_owner,
-            package_name,
-        )
+            feature_branches = gh_api.filter_branches_by_name_pattern(
+                all_branches,
+                "feature-",
+            )
+            logger.info(f"Located {len(feature_branches)} feature branches")
 
-        # Get the set of container tags without matching feature branches
-        to_delete = list(set(feature_packages.keys()) - set(feature_branch_names))
+            for package_name in ["paperless-ngx", "paperless-ngx/builder/cache/app"]:
 
-        for container_tag in to_delete:
-            container_info = feature_packages[container_tag]
+                all_package_versions = gh_api.get_package_versions(package_name)
+                logger.info(
+                    f"Located {len(all_package_versions)} versions of package {package_name}",
+                )
 
-            formatted_endpoint = endpoint.format(
-                ORG=repo_owner,
-                name=package_name,
-                id=container_info["id"],
-            )
+                packages_tagged_feature = gh_api.filter_packages_by_tag_pattern(
+                    all_package_versions,
+                    "feature-",
+                )
+                logger.info(
+                    f'Located {len(packages_tagged_feature)} versions of package {package_name} tagged "feature-"',
+                )
 
-            if dry_run:
+                untagged_packages = gh_api.filter_packages_untagged(
+                    all_package_versions,
+                )
                 logger.info(
-                    f"Would delete {package_name}:{container_tag} with"
-                    f" id: {container_info['id']}",
+                    f"Located {len(untagged_packages)} untagged versions of package {package_name}",
+                )
+
+                to_delete = list(
+                    set(packages_tagged_feature.keys()) - set(feature_branches.keys()),
                 )
-                # logger.debug(formatted_endpoint)
-            else:
                 logger.info(
-                    f"Deleting {package_name}:{container_tag} with"
-                    f" id: {container_info['id']}",
+                    f"Located {len(to_delete)} versions of package {package_name} to delete",
                 )
 
+                for tag_to_delete in to_delete:
+                    package_version_info = packages_tagged_feature[tag_to_delete]
+
+                    logger.info(
+                        f"Deleting {tag_to_delete} (id {package_version_info['id']})",
+                    )
+                    if args.delete:
+                        gh_api.delete_package_version(
+                            package_name,
+                            package_version_info,
+                        )
+
+                if args.untagged:
+                    logger.info(f"Deleting untagged packages of {package_name}")
+                    for to_delete_name in untagged_packages:
+                        to_delete_version = untagged_packages[to_delete_name]
+                        logger.info(f"Deleting id {to_delete_version['id']}")
+                        if args.delete:
+                            gh_api.delete_package_version(
+                                package_name,
+                                to_delete_version,
+                            )
+
+        with DockerHubContainerRegistery() as dh_api:
+            docker_hub_image_version = dh_api.get_image_versions()
+
+            # TODO
+            docker_hub_to_delete = []
+
+            for x in docker_hub_to_delete:
+                dh_api.delete_image_version()
+
 
 if __name__ == "__main__":
     _main()
index a64fa929ae77c4237a10f4ce3116b94431b3b5b6..f0302e79ab7ecb761a977155e986149f0628e791 100644 (file)
@@ -40,5 +40,5 @@ def get_log_level(args) -> int:
     }
     level = levels.get(args.loglevel.lower())
     if level is None:
-        raise ArgumentError(f"{args.loglevel} is not a valid level")
+        level = logging.INFO
     return level
index 3f5fadaaf956c5fa7a7e1406eb488ea419038dbb..5342d040a549c5fff6625a7f493258203420dcdb 100644 (file)
@@ -39,9 +39,9 @@ jobs:
         with:
           python-version: "3.9"
       -
-        name: Install fastai GitHub API
+        name: Install requests
         run: |
-          python -m pip install ghapi requests
+          python -m pip install requests
       -
         name: Cleanup feature tags
         run: |