From: Michael Tremer Date: Wed, 22 Aug 2018 08:01:37 +0000 (+0100) Subject: blog: Parse feeds from other blogs X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c70a7c295df4e3a1dbf71e73bc7c20833feded22;p=ipfire.org.git blog: Parse feeds from other blogs Signed-off-by: Michael Tremer --- diff --git a/src/backend/blog.py b/src/backend/blog.py index 346e3184..6032c00d 100644 --- a/src/backend/blog.py +++ b/src/backend/blog.py @@ -1,6 +1,9 @@ #!/usr/bin/python +import feedparser +import re import textile +import unicodedata from . import misc @@ -60,6 +63,27 @@ class Blog(misc.Object): ORDER BY ts_rank(search_index.document, to_tsquery('english', %s)) DESC \ LIMIT %s", query, query, limit) + def _make_slug(self, s): + # Remove any non-ASCII characters + try: + s = unicodedata.normalize("NFKD", s) + except TypeError: + pass + + # Remove excessive whitespace + s = re.sub(r"[^\w]+", " ", s) + + slug = "-".join(s.split()).lower() + + while True: + e = self.db.get("SELECT 1 FROM blog WHERE slug = %s", slug) + if not e: + break + + slug += "-" + + return slug + def refresh(self): """ Needs to be called after a post has been changed @@ -76,6 +100,63 @@ class Blog(misc.Object): for row in res: yield row.year + def update_feeds(self): + """ + Updates all enabled feeds + """ + for feed in self.db.query("SELECT * FROM blog_feeds WHERE enabled IS TRUE"): + try: + f = feedparser.parse(feed.url) + except Exception as e: + raise e + + with self.db.transaction(): + # Update name + self.db.execute("UPDATE blog_feeds SET name = %s \ + WHERE id = %s", f.feed.title, feed.id) + + # Walk through all entries + for entry in f.entries: + # Skip everything without the "blog.ipfire.org" tag + try: + tags = list((t.term for t in entry.tags)) + + if not "blog.ipfire.org" in tags: + continue + except AttributeError: + continue + + # Get link to the posting site + link = entry.links[0].href + + # Check if the entry has already been imported + res = self.db.get("SELECT id, (updated_at < %s) AS needs_update \ + FROM blog WHERE feed_id = %s AND foreign_id = %s", + entry.updated, feed.id, entry.id) + if res: + # If the post needs to be updated, we do so + if res.needs_update: + self.db.execute("UPDATE blog SET title = %s, author = %s, \ + published_at = %s, updated_at = %s, html = %s, link = %s, \ + tags = %s WHERE id = %s", entry.title, entry.author, + entry.published, entry.updated, entry.summary, link, + feed.tags + tags, res.id) + + # Done here + continue + + # Insert the new post + self.db.execute("INSERT INTO blog(title, slug, author, \ + published_at, html, link, tags, updated_at, feed_id, foreign_id) \ + VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", + entry.title, self._make_slug(entry.title), entry.author, + entry.published, entry.summary, link, feed.tags + tags, + entry.updated, feed.id, entry.id) + + # Refresh the search index + with self.db.transaction(): + self.refresh() + class Post(misc.Object): def init(self, id, data=None):