From: Jim Hague <jim@sinodun.com>
Date: Wed, 14 Feb 2018 10:15:56 +0000 (+0000)
Subject: eit: combine title and subtitle when scraping title with ' % ', fixes #4873
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e6a01316020037e4d030cc55a10427e1d7f6000b;p=thirdparty%2Ftvheadend.git

eit: combine title and subtitle when scraping title with ' % ', fixes #4873

Currently title and subtitle are combined by joining with a single space.
If the scrape is attempting to just modify the title, that's a problem
because you don't know where the title ends.

So instead join with ' % '. % isn't a regex metachar, so this is
convenient for use in regexes.

Update scraper test engine and UK regexes to match.
---

diff --git a/data/conf/epggrab/eit/scrape/uk b/data/conf/epggrab/eit/scrape/uk
index de3191ff0..346c065d0 100644
--- a/data/conf/epggrab/eit/scrape/uk
+++ b/data/conf/epggrab/eit/scrape/uk
@@ -23,7 +23,7 @@
       "New[.:] "
   ],
   "scrape_title": [
-      "^(.+)[.]{3}( )[.]{3}([^.?!:]+)(?:([?!])|[:.])"
+      "^(.+)[.]{3}( )% [.]{3}([^.?!:]+)(?:([?!])|[:.])"
   ],
   "scrape_subtitle": [
     {
@@ -53,7 +53,7 @@
   ],
   "pcre": {
     "scrape_title": [
-      "^(.+)[.]{3}( )[.]{3}(.*?)(?:([?!])|[:]|(?<!Dr|Prof|Rev|Mr|Mrs|Ms|[.][^.])[.]) "
+      "^(.+)[.]{3}( )% [.]{3}(.*?)(?:([?!])|[:]|(?<!Dr|Prof|Rev|Mr|Mrs|Ms|[.][^.])[.]) "
     ],
     "scrape_subtitle": [
       {
diff --git a/src/epggrab/module/eit.c b/src/epggrab/module/eit.c
index 6248031be..6e23ce5df 100644
--- a/src/epggrab/module/eit.c
+++ b/src/epggrab/module/eit.c
@@ -557,7 +557,7 @@ _eit_scrape_text(eit_module_t *eit_mod, eit_event_t *ev)
     char title_summary[2048];
     lang_str_t *ls = lang_str_create();
     RB_FOREACH(se, ev->title, link) {
-      snprintf(title_summary, sizeof(title_summary), "%s %s",
+      snprintf(title_summary, sizeof(title_summary), "%s %% %s",
                se->str, lang_str_get(ev->summary, se->lang));
       if (eit_pattern_apply_list(buffer, sizeof(buffer), title_summary, se->lang, &eit_mod->p_scrape_title)) {
         tvhtrace(LS_TBL_EIT, "  scrape title '%s' from '%s' using %s",
diff --git a/support/eitscrape_test.py b/support/eitscrape_test.py
index 1c95d6043..831a0d6b7 100755
--- a/support/eitscrape_test.py
+++ b/support/eitscrape_test.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 #
-# Copyright (C) 2017 Tvheadend Foundation CIC
+# Copyright (C) 2017, 2018 Tvheadend Foundation CIC
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -170,7 +170,7 @@ class EITScrapeTest(object):
       canonical, _, _ = key.partition(':')
       text = test['summary']
       if canonical == 'new_title':
-        text = test['title'] + ' ' + text
+        text = test['title'] + ' % ' + text
       if 'language' in test:
         lang = test['language']
       else: