]> git.ipfire.org Git - thirdparty/blocklistproject/lists.git/commitdiff
Update remove-duplicates.js
authorgap579137 <30596626+gap579137@users.noreply.github.com>
Tue, 5 Nov 2024 03:44:44 +0000 (21:44 -0600)
committerGitHub <noreply@github.com>
Tue, 5 Nov 2024 03:44:44 +0000 (21:44 -0600)
scripts/remove-duplicates.js

index 2d30fd4eec41fccc3f6ba09acf4fd6ce35fe9d58..bee6f513c0acce512678403530c83bb8b4bd9890 100644 (file)
@@ -2,13 +2,12 @@ const fs = require("node:fs").promises;
 const path = require("node:path");
 
 (async () => {
+       let hasError = false;
+
        try {
-               // Define the base directory containing .txt files
                const directoryPath = path.join(__dirname, "..");
-
-               // Retrieve all .txt files in the directory
-               const files = (await fs.readdir(directoryPath)).filter((file) =>
-                       file.endsWith(".txt")
+               const files = (await fs.readdir(directoryPath)).filter(
+                       (file) => file.endsWith(".txt") && file !== "everything.txt"
                );
 
                // Process each file concurrently
@@ -16,32 +15,80 @@ const path = require("node:path");
                        files.map(async (file) => {
                                const filePath = path.join(directoryPath, file);
                                const fileContents = await fs.readFile(filePath, "utf8");
+                               const lines = fileContents.split("\n");
+
+                               // Collect commented URLs for later validation
+                               const commentedURLs = lines
+                                       .filter((line) => line.startsWith("# 0.0.0.0"))
+                                       .map((line) => line.split(" ")[2].trim());
+
+                               let isHeaderComplete = false;
+
+                               lines.forEach((line, index) => {
+                                       // Mark the end of the header section
+                                       if (line.startsWith("0.0.0.0")) {
+                                               isHeaderComplete = true;
+                                       }
+
+                                       // Check for disallowed "Version" or "Date" in lines
+                                       if (line.includes("Version") || line.includes("Date")) {
+                                               console.error(`Line ${index + 1} in ${file} must not contain "Version" or "Date".`);
+                                               hasError = true;
+                                       }
 
-                               // Initialize a Set to keep track of unique domains
-                               const existingDomains = new Set();
-                               const filteredLines = fileContents
-                                       .split("\n")
-                                       .filter((line) => {
-                                               // Filter duplicate "0.0.0.0" entries
-                                               if (line.startsWith("0.0.0.0 ")) {
-                                                       const domain = line.slice(8); // Extract domain after "0.0.0.0 "
-                                                       if (existingDomains.has(domain)) {
-                                                               return false; // Exclude duplicate
-                                                       }
-                                                       existingDomains.add(domain); // Add unique domain to Set
+                                       // Validate line format: each should start with "#" or "0.0.0.0 "
+                                       if (line.trim() && !line.startsWith("#") && !line.startsWith("0.0.0.0 ")) {
+                                               console.error(`Line ${index + 1} in ${file} must start with "#" or "0.0.0.0 ".`);
+                                               hasError = true;
+                                       }
+
+                                       // Ensure URLs in lines starting with "0.0.0.0 " are lowercase
+                                       if (line.startsWith("0.0.0.0 ")) {
+                                               const url = line.split(" ")[1].split("#")[0].trim();
+                                               if (url.toLowerCase() !== url) {
+                                                       console.error(`Line ${index + 1} in ${file} URL "${url}" must be lowercase.`);
+                                                       hasError = true;
+                                               }
+                                       }
+
+                                       // Check for a space after "#" in comments
+                                       if (line.startsWith("#") && line.length > 1 && line[1] !== " ") {
+                                               console.error(`Line ${index + 1} in ${file} should have a space after "#".`);
+                                               hasError = true;
+                                       }
+
+                                       // Validate lines after the header with "#" start with "# 0.0.0.0" or "# NOTE:"
+                                       if (isHeaderComplete && line.startsWith("#") && !line.startsWith("# 0.0.0.0") && !line.startsWith("# NOTE:")) {
+                                               console.error(`Line ${index + 1} in ${file} should start with "# 0.0.0.0" or "# NOTE:" after the header.`);
+                                               hasError = true;
+                                       }
+
+                                       // Ensure no active URL matches a commented-out URL
+                                       if (line.startsWith("0.0.0.0 ")) {
+                                               const url = line.split(" ")[1].split("#")[0].trim();
+                                               if (commentedURLs.includes(url)) {
+                                                       console.error(`Line ${index + 1} in ${file} URL "${url}" is commented out elsewhere. Remove the duplicate or uncomment.`);
+                                                       hasError = true;
                                                }
-                                               return true; // Include non-duplicate or non-"0.0.0.0" lines
-                                       });
+                                       }
 
-                               // Write the filtered content back to the file
-                               await fs.writeFile(filePath, filteredLines.join("\n"), "utf8");
+                                       // Check URLs for whitespace
+                                       if (line.startsWith("0.0.0.0 ")) {
+                                               const url = line.split(" ")[1].split("#")[0].trim();
+                                               if (/\s/.test(url)) {
+                                                       console.error(`Line ${index + 1} in ${file} URL "${url}" contains whitespace.`);
+                                                       hasError = true;
+                                               }
+                                       }
+                               });
 
-                               console.log(`Processed and removed duplicates in: ${file}`);
+                               console.log(`Checked ${file} - completed validation.`);
                        })
                );
 
-               console.log("All files processed successfully.");
+               process.exit(hasError ? 1 : 0);
        } catch (error) {
-               console.error("Error processing files:", error);
+               console.error("An error occurred during file processing:", error);
+               process.exit(1);
        }
 })();