]> git.ipfire.org Git - ipfire.org.git/blame - src/backend/blog.py
search: Use PostgreSQL's websearch_to_tsquery()
[ipfire.org.git] / src / backend / blog.py
CommitLineData
0a6875dc
MT
1#!/usr/bin/python
2
541c952b 3import datetime
c70a7c29 4import feedparser
aee57270 5import html2text
e9c6d581 6import markdown
023cdd01
MT
7import markdown.extensions
8import markdown.preprocessors
c70a7c29 9import re
7e64f6a3 10import textile
c70a7c29 11import unicodedata
7e64f6a3 12
0a6875dc 13from . import misc
a3a850a4 14from .decorators import *
0a6875dc
MT
15
16class Blog(misc.Object):
17 def _get_post(self, query, *args):
18 res = self.db.get(query, *args)
19
20 if res:
21 return Post(self.backend, res.id, data=res)
22
23 def _get_posts(self, query, *args):
24 res = self.db.query(query, *args)
25
26 for row in res:
27 yield Post(self.backend, row.id, data=row)
28
487417ad
MT
29 def get_by_id(self, id):
30 return self._get_post("SELECT * FROM blog \
31 WHERE id = %s", id)
32
df157ede
MT
33 def get_by_slug(self, slug, published=True):
34 if published:
35 return self._get_post("SELECT * FROM blog \
36 WHERE slug = %s AND published_at <= NOW()", slug)
37
0a6875dc 38 return self._get_post("SELECT * FROM blog \
df157ede
MT
39 WHERE slug = %s", slug)
40
0a6875dc
MT
41 def get_newest(self, limit=None):
42 return self._get_posts("SELECT * FROM blog \
43 WHERE published_at IS NOT NULL \
44 AND published_at <= NOW() \
45 ORDER BY published_at DESC LIMIT %s", limit)
46
47 def get_by_tag(self, tag, limit=None):
48 return self._get_posts("SELECT * FROM blog \
49 WHERE published_at IS NOT NULL \
50 AND published_at <= NOW() \
51 AND %s = ANY(tags) \
4bde7f18 52 ORDER BY published_at DESC LIMIT %s", tag, limit)
0a6875dc 53
cdf85ee7 54 def get_by_author(self, author, limit=None):
0a6875dc 55 return self._get_posts("SELECT * FROM blog \
cdf85ee7 56 WHERE (author = %s OR author_uid = %s) \
0a6875dc
MT
57 AND published_at IS NOT NULL \
58 AND published_at <= NOW() \
cdf85ee7
MT
59 ORDER BY published_at DESC LIMIT %s",
60 author.name, author.uid, limit)
0a6875dc 61
7e64f6a3
MT
62 def get_by_year(self, year):
63 return self._get_posts("SELECT * FROM blog \
64 WHERE EXTRACT(year FROM published_at) = %s \
65 AND published_at IS NOT NULL \
66 AND published_at <= NOW() \
67 ORDER BY published_at DESC", year)
68
0b342a05
MT
69 def get_drafts(self, author=None, limit=None):
70 if author:
71 return self._get_posts("SELECT * FROM blog \
72 WHERE author_uid = %s \
73 AND (published_at IS NULL OR published_at > NOW()) \
74 ORDER BY COALESCE(updated_at, created_at) DESC LIMIT %s",
75 author.uid, limit)
76
77 return self._get_posts("SELECT * FROM blog \
78 WHERE (published_at IS NULL OR published_at > NOW()) \
79 ORDER BY COALESCE(updated_at, created_at) DESC LIMIT %s", limit)
80
0a6875dc
MT
81 def search(self, query, limit=None):
82 return self._get_posts("SELECT blog.* FROM blog \
83 LEFT JOIN blog_search_index search_index ON blog.id = search_index.post_id \
22e56c4a
MT
84 WHERE search_index.document @@ websearch_to_tsquery('english', %s) \
85 ORDER BY ts_rank(search_index.document, websearch_to_tsquery('english', %s)) DESC \
0a6875dc
MT
86 LIMIT %s", query, query, limit)
87
9268f3c3
MT
88 def has_had_recent_activity(self, **kwargs):
89 t = datetime.timedelta(**kwargs)
611adbfb
MT
90
91 res = self.db.get("SELECT COUNT(*) AS count FROM blog \
ae1da47f 92 WHERE published_at IS NOT NULL AND published_at BETWEEN NOW() - %s AND NOW()", t)
611adbfb
MT
93
94 if res and res.count > 0:
95 return True
96
97 return False
98
694c4f08 99 def create_post(self, title, text, author, tags=[], lang="markdown"):
541c952b
MT
100 """
101 Creates a new post and returns the resulting Post object
102 """
694c4f08
MT
103 # Pre-render HTML
104 html = self._render_text(text, lang=lang)
105
106 return self._get_post("INSERT INTO blog(title, slug, text, html, lang, author_uid, tags) \
107 VALUES(%s, %s, %s, %s, %s, %s, %s) RETURNING *", title, self._make_slug(title), text,
108 html, lang, author.uid, list(tags))
541c952b 109
c70a7c29
MT
110 def _make_slug(self, s):
111 # Remove any non-ASCII characters
112 try:
113 s = unicodedata.normalize("NFKD", s)
114 except TypeError:
115 pass
116
117 # Remove excessive whitespace
118 s = re.sub(r"[^\w]+", " ", s)
119
120 slug = "-".join(s.split()).lower()
121
122 while True:
123 e = self.db.get("SELECT 1 FROM blog WHERE slug = %s", slug)
124 if not e:
125 break
126
127 slug += "-"
128
129 return slug
130
2de5ad8a
MT
131 def _render_text(self, text, lang="markdown"):
132 if lang == "markdown":
023cdd01 133 return markdown.markdown(text,
e9c6d581 134 extensions=[
023cdd01 135 PrettyLinksExtension(),
e9c6d581
MT
136 "codehilite",
137 "fenced_code",
e0ef6d39 138 "footnotes",
e9c6d581
MT
139 "nl2br",
140 "sane_lists",
e0ef6d39 141 "tables",
e9c6d581 142 "toc",
e0ef6d39 143 ])
2de5ad8a
MT
144
145 elif lang == "textile":
146 return textile.textile(text)
147
148 return text
149
0a6875dc
MT
150 def refresh(self):
151 """
152 Needs to be called after a post has been changed
153 and updates the search index.
154 """
155 self.db.execute("REFRESH MATERIALIZED VIEW blog_search_index")
156
7e64f6a3
MT
157 @property
158 def years(self):
159 res = self.db.query("SELECT DISTINCT EXTRACT(year FROM published_at)::integer AS year \
160 FROM blog WHERE published_at IS NOT NULL AND published_at <= NOW() \
161 ORDER BY year DESC")
162
163 for row in res:
164 yield row.year
165
aee57270
MT
166 async def announce(self):
167 posts = self._get_posts("SELECT * FROM blog \
168 WHERE (published_at IS NOT NULL AND published_at <= NOW()) \
169 AND announced_at IS NULL")
170
171 for post in posts:
172 await post.announce()
173
9fdf4fb7 174 async def update_feeds(self):
c70a7c29
MT
175 """
176 Updates all enabled feeds
177 """
178 for feed in self.db.query("SELECT * FROM blog_feeds WHERE enabled IS TRUE"):
179 try:
180 f = feedparser.parse(feed.url)
181 except Exception as e:
182 raise e
183
184 with self.db.transaction():
185 # Update name
186 self.db.execute("UPDATE blog_feeds SET name = %s \
187 WHERE id = %s", f.feed.title, feed.id)
188
189 # Walk through all entries
190 for entry in f.entries:
191 # Skip everything without the "blog.ipfire.org" tag
192 try:
193 tags = list((t.term for t in entry.tags))
194
195 if not "blog.ipfire.org" in tags:
196 continue
197 except AttributeError:
198 continue
199
200 # Get link to the posting site
201 link = entry.links[0].href
202
203 # Check if the entry has already been imported
204 res = self.db.get("SELECT id, (updated_at < %s) AS needs_update \
205 FROM blog WHERE feed_id = %s AND foreign_id = %s",
206 entry.updated, feed.id, entry.id)
207 if res:
208 # If the post needs to be updated, we do so
209 if res.needs_update:
210 self.db.execute("UPDATE blog SET title = %s, author = %s, \
211 published_at = %s, updated_at = %s, html = %s, link = %s, \
212 tags = %s WHERE id = %s", entry.title, entry.author,
213 entry.published, entry.updated, entry.summary, link,
214 feed.tags + tags, res.id)
215
216 # Done here
217 continue
218
219 # Insert the new post
220 self.db.execute("INSERT INTO blog(title, slug, author, \
221 published_at, html, link, tags, updated_at, feed_id, foreign_id) \
222 VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
223 entry.title, self._make_slug(entry.title), entry.author,
224 entry.published, entry.summary, link, feed.tags + tags,
225 entry.updated, feed.id, entry.id)
226
20277bf5
MT
227 # Mark feed as updated
228 self.db.execute("UPDATE blog_feeds SET last_updated_at = CURRENT_TIMESTAMP \
229 WHERE id = %s" % feed.id)
230
c70a7c29
MT
231 # Refresh the search index
232 with self.db.transaction():
233 self.refresh()
234
0a6875dc
MT
235
236class Post(misc.Object):
237 def init(self, id, data=None):
238 self.id = id
239 self.data = data
240
541c952b
MT
241 # Title
242
93725180
MT
243 @property
244 def title(self):
0a6875dc
MT
245 return self.data.title
246
247 @property
248 def slug(self):
249 return self.data.slug
250
a3a850a4 251 @lazy_property
0a6875dc
MT
252 def author(self):
253 if self.data.author_uid:
254 return self.backend.accounts.get_by_uid(self.data.author_uid)
255
cdf85ee7
MT
256 return self.data.author
257
0a6875dc
MT
258 @property
259 def created_at(self):
260 return self.data.created_at
261
2de5ad8a
MT
262 @property
263 def lang(self):
264 return self.data.lang
265
541c952b
MT
266 # Published?
267
0a6875dc
MT
268 @property
269 def published_at(self):
270 return self.data.published_at
271
541c952b
MT
272 def is_published(self):
273 """
274 Returns True if the post is already published
275 """
276 return self.published_at and self.published_at <= datetime.datetime.now()
277
9ea64cef 278 def publish(self, when=None):
541c952b
MT
279 if self.is_published():
280 return
281
9ea64cef
MT
282 self.db.execute("UPDATE blog SET published_at = COALESCE(%s, CURRENT_TIMESTAMP) \
283 WHERE id = %s", when, self.id)
541c952b
MT
284
285 # Update search indices
286 self.backend.blog.refresh()
287
288 # Updated?
289
7e64f6a3 290 @property
541c952b
MT
291 def updated_at(self):
292 return self.data.updated_at
293
541c952b
MT
294 # Text
295
93725180
MT
296 @property
297 def text(self):
541c952b
MT
298 return self.data.text
299
541c952b 300 # HTML
7e64f6a3 301
a3a850a4 302 @lazy_property
0a6875dc
MT
303 def html(self):
304 """
305 Returns this post as rendered HTML
306 """
2de5ad8a 307 return self.data.html or self.backend.blog._render_text(self.text, lang=self.lang)
8ebc98d4 308
aee57270
MT
309 @lazy_property
310 def plaintext(self):
311 h = html2text.HTML2Text()
312 h.ignore_links = True
313
314 return h.handle(self.html)
315
316 # Excerpt
317
318 @property
319 def excerpt(self):
320 paragraphs = self.plaintext.split("\n\n")
321
322 excerpt = []
323
324 for paragraph in paragraphs:
325 excerpt.append(paragraph)
326
327 # Add another paragraph if we encountered a headline
328 if paragraph.startswith("#"):
329 continue
330
331 # End if this paragraph was long enough
332 if len(paragraph) >= 40:
333 break
334
335 return "\n\n".join(excerpt)
336
541c952b
MT
337 # Tags
338
93725180
MT
339 @property
340 def tags(self):
8ebc98d4 341 return self.data.tags
1e76fec4 342
93725180 343 # Link
541c952b 344
1e76fec4
MT
345 @property
346 def link(self):
347 return self.data.link
984e4e7b 348
a3a850a4 349 @lazy_property
984e4e7b
MT
350 def release(self):
351 return self.backend.releases._get_release("SELECT * FROM releases \
352 WHERE published IS NOT NULL AND published <= NOW() AND blog_id = %s", self.id)
e8a81a70
MT
353
354 def is_editable(self, editor):
355 # Authors can edit their own posts
356 return self.author == editor
93725180
MT
357
358 def update(self, title, text, tags=[]):
359 """
360 Called to update the content of this post
361 """
362 # Update slug when post isn't published yet
baa294fb
MT
363 slug = self.backend.blog._make_slug(title) \
364 if not self.is_published() and not self.title == title else self.slug
93725180 365
694c4f08
MT
366 # Render and cache HTML
367 html = self.backend.blog._render_text(text, lang=self.lang)
93725180 368
694c4f08 369 self.db.execute("UPDATE blog SET title = %s, slug = %s, text = %s, html = %s, \
93725180 370 tags = %s, updated_at = CURRENT_TIMESTAMP WHERE id = %s",
694c4f08 371 title, slug, text, html, list(tags), self.id)
93725180
MT
372
373 # Update cache
374 self.data.update({
375 "title" : title,
376 "slug" : slug,
377 "text" : text,
d73edae7 378 "html" : html,
93725180
MT
379 "tags" : tags,
380 })
381
382 # Update search index if post is published
383 if self.is_published():
384 self.backend.blog.refresh()
914238a5
MT
385
386 def delete(self):
387 self.db.execute("DELETE FROM blog WHERE id = %s", self.id)
388
389 # Update search indices
390 self.backend.blog.refresh()
023cdd01 391
aee57270
MT
392 async def announce(self):
393 # Get people who should receive this message
394 group = self.backend.groups.get_by_gid("promotional-consent")
395 if not group:
396 return
397
398 with self.db.transaction():
399 # Generate an email for everybody in this group
400 for account in group:
401 self.backend.messages.send_template("blog/messages/announcement",
402 account=account, post=self)
403
404 # Mark this post as announced
405 self.db.execute("UPDATE blog SET announced_at = CURRENT_TIMESTAMP \
406 WHERE id = %s", self.id)
407
023cdd01
MT
408
409class PrettyLinksExtension(markdown.extensions.Extension):
410 def extendMarkdown(self, md):
411 md.preprocessors.register(BugzillaLinksPreprocessor(md), "bugzilla", 10)
412 md.preprocessors.register(CVELinksPreprocessor(md), "cve", 10)
413
414
415class BugzillaLinksPreprocessor(markdown.preprocessors.Preprocessor):
a77a9c0a 416 regex = re.compile(r"(?:#(\d{5,}))", re.I)
023cdd01
MT
417
418 def run(self, lines):
419 for line in lines:
420 yield self.regex.sub(r"[#\1](https://bugzilla.ipfire.org/show_bug.cgi?id=\1)", line)
421
422
423class CVELinksPreprocessor(markdown.preprocessors.Preprocessor):
424 regex = re.compile(r"(?:CVE)[\s\-](\d{4}\-\d+)")
425
426 def run(self, lines):
427 for line in lines:
428 yield self.regex.sub(r"[CVE-\1](https://cve.mitre.org/cgi-bin/cvename.cgi?name=\1)", line)