From: Marc Foley Date: Fri, 8 Mar 2024 15:45:44 +0000 (+0000) Subject: knowledge_graph: Do not check url if domain is in whitelist X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F7373%2Fhead;p=thirdparty%2Fgoogle%2Ffonts.git knowledge_graph: Do not check url if domain is in whitelist --- diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4c657f5736..4cf0fe3046 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -9,6 +9,7 @@ on: - "cc-by-sa/*" - "to_*.txt" - ".github/workflows/ci.yaml" + - "/.github/workflows/knowledge_graph.py" pull_request: branches: ['**'] paths: diff --git a/.github/workflows/knowledge_graph.py b/.github/workflows/knowledge_graph.py index ee5899617d..edc82e0512 100644 --- a/.github/workflows/knowledge_graph.py +++ b/.github/workflows/knowledge_graph.py @@ -12,6 +12,7 @@ import sys from typing import Callable, Iterable, List, Mapping, NamedTuple, Optional, Tuple, Set, Union import requests from functools import lru_cache +from urllib.parse import urlparse MAX_RASTER_IMAGE_SIZE_KB = 800 @@ -192,46 +193,40 @@ def _check_md_file_contents(repo_root: Path, md_file: Path, ast: List[MdValue]) def _check_outbound_link(url: str): # Following urls work correctly on a web browser but raise a 400 code when using python requests whitelist = frozenset([ - "https://www.jessicahische.is/talkingtype", - "http://www.layoutgridcalculator.com/typographic-scale/", - "https://www.figma.com/community/plugin/1088610476491668236/Material-Symbols", - "https://www.researchgate.net/publication/274013793_Legibility_in_Industrial_AR_Text_Style_Color_Coding_and_Illuminance", - "https://www.researchgate.net/figure/Biblia-latina-The-42-Line-Bible-Mainz-Johannes-Gutenberg-for-Johann-Fust-ca_fig3_317915502", - "https://www.nytimes.com/2003/10/21/business/the-media-business-a-face-lift-for-the-times-typographically-that-is.html", - "https://www.circuitousroot.com/artifice/letters/pantocut/benton/vertical/index.html", - "https://www.circuitousroot.com/artifice/letters/press/typemaking/after-casting/mortising/rouse/inst-and-print/index.html", - "https://www.paulshawletterdesign.com/2011/11/tutorial-no-6%E2%80%94tight-but-not-touching-kerning/", - "https://codepen.io/mandymichael/pen/pxXNbr", - "https://typetura.com", - "https://gigapress.net/reduce-http-requests/", - "https://help.figma.com/hc/en-us/articles/5579502031511-Use-variable-fonts#Replace_static_fonts", - "https://twitter.com/romanshamin_en/status/1562801657691672576", - "https://www.sciencedirect.com/science/article/pii/S0042698907005561", - "https://doi.org/10.1002/rrq.411", - "https://www.webmd.com/healthy-aging/news/20140318/color-vision-tends-to-fade-with-age-study#1", - "https://support.google.com/accessibility/android/answer/12159181?hl=en", - "http://www.languagegeek.com/typography/syllabics/syllabic_variation.pdf", - "https://www.freepik.com/free-photos-vectors/travel", - "https://psycnet.apa.org/record/2018-13691-001", - "https://www.cortezlawfirmpllc.com/wp-content/uploads/sites/1600711/2020/05/glare_congressional_report.pdf", - "http://kupferschrift.de/cms/2012/03/on-classifications/", - "https://medium.engineering/typography-is-impossible-5872b0c7f891", - "https://medium.com/microsoft-design/leading-trim-the-future-of-digital-typesetting-d082d84b202", - "https://medium.com/eightshapes-llc/space-in-design-systems-188bcbae0d62#:~:text=Solve%20Collisions%20like%20Line%20Height%20Systematically", - "https://nedwin.medium.com/the-1-5m-napkin-abd2702927d0", - "https://www.colourblindawareness.org/colour-blindness/types-of-colour-blindness/", - "https://medium.com/the-readability-group/whats-in-a-word-53bcf217d5c1", - "https://medium.com/the-readability-group/a-guide-to-understanding-what-makes-a-typeface-accessible-and-how-to-make-informed-decisions-9e5c0b9040a0", - "https://medium.com/@mcpflug/whats-in-a-name-the-perception-of-pro-7fffa6cddcb8", - "https://medium.com/@tilougarou/the-typographic-scale-reworked-a1b441b2beb2", + 'circuitousroot.com', + 'codepen.io', + 'colourblindawareness.org', + 'cortezlawfirmpllc.com', + 'doi.org', + 'figma.com', + 'freepik.com', + 'gigapress.net', + 'help.figma.com', + 'kupferschrift.de', + 'languagegeek.com', + 'layoutgridcalculator.com', + 'medium.com', + 'medium.engineering', + 'nedwin.medium.com', + 'nytimes.com', + 'paulshawletterdesign.com', + 'psycnet.apa.org', + 'researchgate.net', + 'sciencedirect.com', + 'support.google.com', + 'twitter.com', + 'typetura.com', + 'webmd.com' ]) # Following urls will be fixed at a later date. If the CI is failing and a suitable # replacement url cannot be found, please add them to this set. - to_fix = frozenset() - if url in whitelist | to_fix: + to_fix = frozenset([ + "jessicahische.is", + ]) + if urlparse(url).netloc.replace("www.", "") in whitelist | to_fix: return True - response = requests.head(url, allow_redirects=True) + response = requests.head(url, allow_redirects=True, timeout=30) if not response.ok: print(f"INVALID url {url}' returned response status code '{response.status_code}'") return response.ok