]> git.ipfire.org Git - ipfire.org.git/commitdiff
blog: Parse feeds from other blogs
authorMichael Tremer <michael.tremer@ipfire.org>
Wed, 22 Aug 2018 08:01:37 +0000 (09:01 +0100)
committerMichael Tremer <michael.tremer@ipfire.org>
Wed, 22 Aug 2018 08:01:37 +0000 (09:01 +0100)
Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/backend/blog.py

index 346e3184052a913ed29060f9117890d1aeafcd69..6032c00d78484f10e064e081fc6cf36102698c6f 100644 (file)
@@ -1,6 +1,9 @@
 #!/usr/bin/python
 
+import feedparser
+import re
 import textile
+import unicodedata
 
 from . import misc
 
@@ -60,6 +63,27 @@ class Blog(misc.Object):
                                ORDER BY ts_rank(search_index.document, to_tsquery('english', %s)) DESC \
                        LIMIT %s", query, query, limit)
 
+       def _make_slug(self, s):
+               # Remove any non-ASCII characters
+               try:
+                       s = unicodedata.normalize("NFKD", s)
+               except TypeError:
+                       pass
+
+               # Remove excessive whitespace
+               s = re.sub(r"[^\w]+", " ", s)
+
+               slug = "-".join(s.split()).lower()
+
+               while True:
+                       e = self.db.get("SELECT 1 FROM blog WHERE slug = %s", slug)
+                       if not e:
+                               break
+
+                       slug += "-"
+
+               return slug
+
        def refresh(self):
                """
                        Needs to be called after a post has been changed
@@ -76,6 +100,63 @@ class Blog(misc.Object):
                for row in res:
                        yield row.year
 
+       def update_feeds(self):
+               """
+                       Updates all enabled feeds
+               """
+               for feed in self.db.query("SELECT * FROM blog_feeds WHERE enabled IS TRUE"):
+                       try:
+                               f = feedparser.parse(feed.url)
+                       except Exception as e:
+                               raise e
+
+                       with self.db.transaction():
+                               # Update name
+                               self.db.execute("UPDATE blog_feeds SET name = %s \
+                                       WHERE id = %s", f.feed.title, feed.id)
+
+                               # Walk through all entries
+                               for entry in f.entries:
+                                       # Skip everything without the "blog.ipfire.org" tag
+                                       try:
+                                               tags = list((t.term for t in entry.tags))
+
+                                               if not "blog.ipfire.org" in tags:
+                                                       continue
+                                       except AttributeError:
+                                               continue
+
+                                       # Get link to the posting site
+                                       link = entry.links[0].href
+
+                                       # Check if the entry has already been imported
+                                       res = self.db.get("SELECT id, (updated_at < %s) AS needs_update \
+                                                       FROM blog WHERE feed_id = %s AND foreign_id = %s",
+                                                       entry.updated, feed.id, entry.id)
+                                       if res:
+                                               # If the post needs to be updated, we do so
+                                               if res.needs_update:
+                                                       self.db.execute("UPDATE blog SET title = %s, author = %s, \
+                                                               published_at = %s, updated_at = %s, html = %s, link = %s, \
+                                                               tags = %s WHERE id = %s", entry.title, entry.author,
+                                                               entry.published, entry.updated, entry.summary, link,
+                                                               feed.tags + tags, res.id)
+
+                                               # Done here
+                                               continue
+
+                                       # Insert the new post
+                                       self.db.execute("INSERT INTO blog(title, slug, author, \
+                                               published_at, html, link, tags, updated_at, feed_id, foreign_id) \
+                                               VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
+                                               entry.title, self._make_slug(entry.title), entry.author,
+                                               entry.published, entry.summary, link, feed.tags + tags,
+                                               entry.updated, feed.id, entry.id)
+
+               # Refresh the search index
+               with self.db.transaction():
+                       self.refresh()
+
 
 class Post(misc.Object):
        def init(self, id, data=None):