]> git.ipfire.org Git - thirdparty/blocklistproject/lists.git/commitdiff
Update remove-duplicates.js
authorgap579137 <30596626+gap579137@users.noreply.github.com>
Tue, 5 Nov 2024 03:43:33 +0000 (21:43 -0600)
committerGitHub <noreply@github.com>
Tue, 5 Nov 2024 03:43:33 +0000 (21:43 -0600)
scripts/remove-duplicates.js

index 02586f11311c7164b8f9e67377b1b8b2ad2f185e..2d30fd4eec41fccc3f6ba09acf4fd6ce35fe9d58 100644 (file)
@@ -3,33 +3,44 @@ const path = require("node:path");
 
 (async () => {
        try {
+               // Define the base directory containing .txt files
                const directoryPath = path.join(__dirname, "..");
+
+               // Retrieve all .txt files in the directory
                const files = (await fs.readdir(directoryPath)).filter((file) =>
-                       file.endsWith(".txt"),
+                       file.endsWith(".txt")
                );
 
+               // Process each file concurrently
                await Promise.all(
                        files.map(async (file) => {
                                const filePath = path.join(directoryPath, file);
                                const fileContents = await fs.readFile(filePath, "utf8");
 
-                               const lines = fileContents.split("\n");
+                               // Initialize a Set to keep track of unique domains
                                const existingDomains = new Set();
-                               const filteredLines = lines.filter((line) => {
-                                       if (line.startsWith("0.0.0.0 ")) {
-                                               const domain = line.replace("0.0.0.0 ", "");
-                                               if (!existingDomains.has(domain)) {
-                                                       existingDomains.add(domain);
-                                                       return true;
+                               const filteredLines = fileContents
+                                       .split("\n")
+                                       .filter((line) => {
+                                               // Filter duplicate "0.0.0.0" entries
+                                               if (line.startsWith("0.0.0.0 ")) {
+                                                       const domain = line.slice(8); // Extract domain after "0.0.0.0 "
+                                                       if (existingDomains.has(domain)) {
+                                                               return false; // Exclude duplicate
+                                                       }
+                                                       existingDomains.add(domain); // Add unique domain to Set
                                                }
-                                               return false;
-                                       }
-                                       return true;
-                               });
+                                               return true; // Include non-duplicate or non-"0.0.0.0" lines
+                                       });
 
+                               // Write the filtered content back to the file
                                await fs.writeFile(filePath, filteredLines.join("\n"), "utf8");
-                       }),
+
+                               console.log(`Processed and removed duplicates in: ${file}`);
+                       })
                );
+
+               console.log("All files processed successfully.");
        } catch (error) {
                console.error("Error processing files:", error);
        }