From: gap579137 <30596626+gap579137@users.noreply.github.com> Date: Tue, 5 Nov 2024 03:44:44 +0000 (-0600) Subject: Update remove-duplicates.js X-Git-Tag: aggregated-20250518~16 X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=6279bcca6d526f194d55b28ed79a648ccf5420de;p=thirdparty%2Fblocklistproject%2Flists.git Update remove-duplicates.js --- diff --git a/scripts/remove-duplicates.js b/scripts/remove-duplicates.js index 2d30fd4..bee6f51 100644 --- a/scripts/remove-duplicates.js +++ b/scripts/remove-duplicates.js @@ -2,13 +2,12 @@ const fs = require("node:fs").promises; const path = require("node:path"); (async () => { + let hasError = false; + try { - // Define the base directory containing .txt files const directoryPath = path.join(__dirname, ".."); - - // Retrieve all .txt files in the directory - const files = (await fs.readdir(directoryPath)).filter((file) => - file.endsWith(".txt") + const files = (await fs.readdir(directoryPath)).filter( + (file) => file.endsWith(".txt") && file !== "everything.txt" ); // Process each file concurrently @@ -16,32 +15,80 @@ const path = require("node:path"); files.map(async (file) => { const filePath = path.join(directoryPath, file); const fileContents = await fs.readFile(filePath, "utf8"); + const lines = fileContents.split("\n"); + + // Collect commented URLs for later validation + const commentedURLs = lines + .filter((line) => line.startsWith("# 0.0.0.0")) + .map((line) => line.split(" ")[2].trim()); + + let isHeaderComplete = false; + + lines.forEach((line, index) => { + // Mark the end of the header section + if (line.startsWith("0.0.0.0")) { + isHeaderComplete = true; + } + + // Check for disallowed "Version" or "Date" in lines + if (line.includes("Version") || line.includes("Date")) { + console.error(`Line ${index + 1} in ${file} must not contain "Version" or "Date".`); + hasError = true; + } - // Initialize a Set to keep track of unique domains - const existingDomains = new Set(); - const filteredLines = fileContents - .split("\n") - .filter((line) => { - // Filter duplicate "0.0.0.0" entries - if (line.startsWith("0.0.0.0 ")) { - const domain = line.slice(8); // Extract domain after "0.0.0.0 " - if (existingDomains.has(domain)) { - return false; // Exclude duplicate - } - existingDomains.add(domain); // Add unique domain to Set + // Validate line format: each should start with "#" or "0.0.0.0 " + if (line.trim() && !line.startsWith("#") && !line.startsWith("0.0.0.0 ")) { + console.error(`Line ${index + 1} in ${file} must start with "#" or "0.0.0.0 ".`); + hasError = true; + } + + // Ensure URLs in lines starting with "0.0.0.0 " are lowercase + if (line.startsWith("0.0.0.0 ")) { + const url = line.split(" ")[1].split("#")[0].trim(); + if (url.toLowerCase() !== url) { + console.error(`Line ${index + 1} in ${file} URL "${url}" must be lowercase.`); + hasError = true; + } + } + + // Check for a space after "#" in comments + if (line.startsWith("#") && line.length > 1 && line[1] !== " ") { + console.error(`Line ${index + 1} in ${file} should have a space after "#".`); + hasError = true; + } + + // Validate lines after the header with "#" start with "# 0.0.0.0" or "# NOTE:" + if (isHeaderComplete && line.startsWith("#") && !line.startsWith("# 0.0.0.0") && !line.startsWith("# NOTE:")) { + console.error(`Line ${index + 1} in ${file} should start with "# 0.0.0.0" or "# NOTE:" after the header.`); + hasError = true; + } + + // Ensure no active URL matches a commented-out URL + if (line.startsWith("0.0.0.0 ")) { + const url = line.split(" ")[1].split("#")[0].trim(); + if (commentedURLs.includes(url)) { + console.error(`Line ${index + 1} in ${file} URL "${url}" is commented out elsewhere. Remove the duplicate or uncomment.`); + hasError = true; } - return true; // Include non-duplicate or non-"0.0.0.0" lines - }); + } - // Write the filtered content back to the file - await fs.writeFile(filePath, filteredLines.join("\n"), "utf8"); + // Check URLs for whitespace + if (line.startsWith("0.0.0.0 ")) { + const url = line.split(" ")[1].split("#")[0].trim(); + if (/\s/.test(url)) { + console.error(`Line ${index + 1} in ${file} URL "${url}" contains whitespace.`); + hasError = true; + } + } + }); - console.log(`Processed and removed duplicates in: ${file}`); + console.log(`Checked ${file} - completed validation.`); }) ); - console.log("All files processed successfully."); + process.exit(hasError ? 1 : 0); } catch (error) { - console.error("Error processing files:", error); + console.error("An error occurred during file processing:", error); + process.exit(1); } })();