From d49f890c740181cd1fb79b03a2bf7e3bd48b404f Mon Sep 17 00:00:00 2001 From: gap579137 Date: Sat, 17 May 2025 22:12:07 -0500 Subject: [PATCH] rework --- .github/workflows/aggregate.yml | 49 +++++++-------------------------- scripts/aggregate.py | 26 +++++++++++++++-- 2 files changed, 33 insertions(+), 42 deletions(-) diff --git a/.github/workflows/aggregate.yml b/.github/workflows/aggregate.yml index 880605a..506d8e8 100644 --- a/.github/workflows/aggregate.yml +++ b/.github/workflows/aggregate.yml @@ -19,31 +19,36 @@ jobs: - name: Generate aggregated lists run: python3 scripts/aggregate.py + - name: Verify output files exist + run: | + mkdir -p releases + if [ ! -f releases/aggregated-hosts.txt ] || [ ! -f releases/aggregated-dnsmasq.conf ] || [ ! -f releases/aggregated-adblock.txt ]; then + echo "Error: One or more output files are missing!" + exit 1 + fi + - name: Commit outputs run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" git add releases/ # only commit if there are changes - git diff --quiet || git commit -m "chore: update aggregated lists" + git diff --quiet && echo "No changes to commit" || git commit -m "chore: update aggregated lists" git push release: needs: build runs-on: ubuntu-latest steps: - # 1) Checkout with full history and push rights - name: Checkout code uses: actions/checkout@v3 with: fetch-depth: 0 # so tags and history are available persist-credentials: true # so we can push - # 2) Compute YYYYMMDD - name: Set release date run: echo "RELEASE_DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV - # 3) Create & push the tag - name: Create Git tag run: | git config user.name "github-actions[bot]" @@ -51,40 +56,6 @@ jobs: git tag aggregated-${{ env.RELEASE_DATE }} git push origin aggregated-${{ env.RELEASE_DATE }} - # 4) Now GitHub Release will find that tag - - name: Create GitHub Release - id: create_release - uses: actions/create-release@v1 - with: - tag_name: aggregated-${{ env.RELEASE_DATE }} - release_name: Aggregated Lists ${{ env.RELEASE_DATE }} - body: | - Aggregated lists for ${{ env.RELEASE_DATE }}. - draft: false - prerelease: false - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - # 5) Upload the three aggregated files - - name: Upload release assets - uses: softprops/action-gh-release@v1 - with: - files: | - releases/aggregated-hosts.txt - releases/aggregated-dnsmasq.conf - releases/aggregated-adblock.txt - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - needs: build - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - # 1) Generate YYYYMMDD and put it in $GITHUB_ENV - - name: Set release date - run: echo "RELEASE_DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV - - # 2) Create the release with a valid tag - name: Create GitHub Release id: create_release uses: actions/create-release@v1 @@ -98,7 +69,6 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # 3) Attach your three files as assets - name: Upload release assets uses: softprops/action-gh-release@v1 with: @@ -106,5 +76,6 @@ jobs: releases/aggregated-hosts.txt releases/aggregated-dnsmasq.conf releases/aggregated-adblock.txt + tag_name: aggregated-${{ env.RELEASE_DATE }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/scripts/aggregate.py b/scripts/aggregate.py index b36b56a..e3c59e8 100755 --- a/scripts/aggregate.py +++ b/scripts/aggregate.py @@ -1,15 +1,29 @@ #!/usr/bin/env python3 import os import glob +import re def load_domains(path): domains = set() with open(path, encoding='utf-8') as f: for line in f: + # Remove comments line = line.split('#',1)[0].strip() + + # Skip empty lines or comment lines if not line or line.startswith('!') or line.startswith('['): continue - domains.add(line) + + # Extract domain from "0.0.0.0 domain.com" format + if line.startswith('0.0.0.0 '): + domain = line[8:].strip() + # Ensure it's a valid domain + if domain and ' ' not in domain: + domains.add(domain) + else: + # Try to extract domain if it's just a raw domain + if re.match(r'^[a-zA-Z0-9][-a-zA-Z0-9.]*\.[a-zA-Z]{2,}$', line): + domains.add(line) return domains def write_hosts(domains, out_path): @@ -32,13 +46,19 @@ def write_adblock(domains, out_path): if __name__ == '__main__': base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) - lists_dir = os.path.join(base_dir, 'Lists') + # The list files are in the repository root, not in a 'Lists' subdirectory + lists_dir = base_dir out_dir = os.path.join(base_dir, 'releases') os.makedirs(out_dir, exist_ok=True) domains = set() for txt in glob.glob(os.path.join(lists_dir, '*.txt')): - domains |= load_domains(txt) + # Skip everything.txt to avoid duplicates + if not os.path.basename(txt) == 'everything.txt': + print(f'Processing {os.path.basename(txt)}...') + file_domains = load_domains(txt) + domains.update(file_domains) + print(f' Added {len(file_domains)} domains') write_hosts(domains, os.path.join(out_dir, 'aggregated-hosts.txt')) write_dnsmasq(domains, os.path.join(out_dir, 'aggregated-dnsmasq.conf')) -- 2.47.2