From: Michael Tremer Date: Wed, 20 Dec 2023 12:47:21 +0000 (+0000) Subject: docs: Add a way to extract linked images X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5ab706510f95afe43ebd10bfd4a2e4c8bf1c0d4d;p=ipfire.org.git docs: Add a way to extract linked images Signed-off-by: Michael Tremer --- diff --git a/src/backend/wiki.py b/src/backend/wiki.py index 0a2d925a..2e49980d 100644 --- a/src/backend/wiki.py +++ b/src/backend/wiki.py @@ -258,9 +258,7 @@ class Wiki(misc.Object): filename, author.uid, address, mimetype, blob.id, len(data)) def render(self, path, text): - r = WikiRenderer(self.backend, path) - - return r.render(text) + return WikiRenderer(self.backend, path, text) class Page(misc.Object): @@ -352,7 +350,15 @@ class Page(misc.Object): lines.append(line) - return self.backend.wiki.render(self.page, "\n".join(lines)) + renderer = self.backend.wiki.render(self.page, "\n".join(lines)) + + return renderer.html + + @property + def images(self): + renderer = self.backend.wiki.render(self.page, self.markdown) + + return renderer.images @property def timestamp(self): @@ -576,31 +582,6 @@ class File(misc.Object): return thumbnail -class PrettyLinksExtension(markdown.extensions.Extension): - def extendMarkdown(self, md): - # Create links to Bugzilla - md.preprocessors.register(BugzillaLinksPreprocessor(md), "bugzilla", 10) - - # Create links to CVE - md.preprocessors.register(CVELinksPreprocessor(md), "cve", 10) - - -class BugzillaLinksPreprocessor(markdown.preprocessors.Preprocessor): - regex = re.compile(r"(?:#(\d{5,}))", re.I) - - def run(self, lines): - for line in lines: - yield self.regex.sub(r"[#\1](https://bugzilla.ipfire.org/show_bug.cgi?id=\1)", line) - - -class CVELinksPreprocessor(markdown.preprocessors.Preprocessor): - regex = re.compile(r"(?:CVE)[\s\-](\d{4}\-\d+)") - - def run(self, lines): - for line in lines: - yield self.regex.sub(r"[CVE-\1](https://cve.mitre.org/cgi-bin/cvename.cgi?name=\1)", line) - - class WikiRenderer(misc.Object): schemas = ( "ftp://", @@ -614,27 +595,32 @@ class WikiRenderer(misc.Object): ) # Links - links = re.compile(r"(.*?)") + _links = re.compile(r"(.*?)") # Images - images = re.compile(r"") - - # Markdown Renderer - renderer = markdown.Markdown( - extensions=[ - PrettyLinksExtension(), - "codehilite", - "fenced_code", - "footnotes", - "nl2br", - "sane_lists", - "tables", - "toc", - ], - ) + _images = re.compile(r"") - def init(self, path): + def init(self, path, text): self.path = path + self.text = text + + # Markdown Renderer + self.renderer = markdown.Markdown( + extensions=[ + ImageExtractorExtension(), + PrettyLinksExtension(), + "codehilite", + "fenced_code", + "footnotes", + "nl2br", + "sane_lists", + "tables", + "toc", + ], + ) + + # Render! + self.html = self._render() def _render_link(self, m): url, text = m.groups() @@ -731,16 +717,79 @@ class WikiRenderer(misc.Object): "url" : url, } - def render(self, text): + def _render(self): logging.debug("Rendering %s" % self.path) # Render... - text = self.renderer.convert(text) + text = self.renderer.convert(self.text) # Postprocess links - text = self.links.sub(self._render_link, text) + text = self._links.sub(self._render_link, text) # Postprocess images to
- text = self.images.sub(self._render_image, text) + text = self._images.sub(self._render_image, text) return text + + @lazy_property + def images(self): + """ + A list of all images that have been part of the rendered markup + """ + images = [] + + for url in self.renderer.images: + # Skip external images + if url.startswith("https://") or url.startswith("http://"): + continue + + # Make the URL absolute + url = self.backend.wiki.make_path(self.path, url) + + images.append(url) + + return images + + +class PrettyLinksExtension(markdown.extensions.Extension): + def extendMarkdown(self, md): + # Create links to Bugzilla + md.preprocessors.register(BugzillaLinksPreprocessor(md), "bugzilla", 10) + + # Create links to CVE + md.preprocessors.register(CVELinksPreprocessor(md), "cve", 10) + + +class BugzillaLinksPreprocessor(markdown.preprocessors.Preprocessor): + regex = re.compile(r"(?:#(\d{5,}))", re.I) + + def run(self, lines): + for line in lines: + yield self.regex.sub(r"[#\1](https://bugzilla.ipfire.org/show_bug.cgi?id=\1)", line) + + +class CVELinksPreprocessor(markdown.preprocessors.Preprocessor): + regex = re.compile(r"(?:CVE)[\s\-](\d{4}\-\d+)") + + def run(self, lines): + for line in lines: + yield self.regex.sub(r"[CVE-\1](https://cve.mitre.org/cgi-bin/cvename.cgi?name=\1)", line) + + +class ImageExtractor(markdown.treeprocessors.Treeprocessor): + """ + Finds all images + """ + def run(self, root): + self.md.images = [] + + # Find all images and store the URLs + for image in root.findall(".//img"): + src = image.get("src") + + self.md.images.append(src) + + +class ImageExtractorExtension(markdown.extensions.Extension): + def extendMarkdown(self, md): + md.treeprocessors.register(ImageExtractor(md), "image-extractor", 10) diff --git a/src/web/docs.py b/src/web/docs.py index 984b1f7c..25fc01c7 100644 --- a/src/web/docs.py +++ b/src/web/docs.py @@ -224,9 +224,9 @@ class RenderHandler(base.BaseHandler): content = self.get_argument("content") # Render the content - html = self.backend.wiki.render(path, content) + renderer = self.backend.wiki.render(path, content) - self.finish(html) + self.finish(renderer.html) class RestoreHandler(base.BaseHandler):