From: Jim Hague Date: Wed, 13 Dec 2017 21:37:00 +0000 (+0000) Subject: eit: Add optional 2nd match subexpression for subtitle (#4791) X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4b97d6942f8c84b3f95b4bb3097e11327d900f07;p=thirdparty%2Ftvheadend.git eit: Add optional 2nd match subexpression for subtitle (#4791) If the regex for the subtitle contains a second subexpression, and a match is made, use the first subexpression for the subtitle and replace the summary with the second subexpression. For example, a UK Freeview subtitle regex might choose, when matching a summary 'Subtitle: Text', to set the subtitle to 'Subtitle' and set the summary to 'Text' to avoid repetition of the subtitle. Update the scraper test script to support a test field 'new_summary'. As the 'uk' scraper does not include any second subexpressions, do not update the test data for now. Issue: #4791 --- diff --git a/src/epggrab/module/eit.c b/src/epggrab/module/eit.c index f261f56e2..cdd3d4da6 100644 --- a/src/epggrab/module/eit.c +++ b/src/epggrab/module/eit.c @@ -703,13 +703,21 @@ static int _eit_process_event_one */ const char *summary = lang_str_get(ev.summary, ev.default_charset); char buffer[2048]; - if (eit_pattern_apply_list(buffer, sizeof(buffer), summary, &eit_mod->p_scrape_subtitle)) { + char buffer2[2048]; + char *bufs[2] = { buffer, buffer2 }; + size_t sizes[2] = { sizeof(buffer), sizeof(buffer2) }; + if (eit_pattern_apply_list_2(bufs, sizes, summary, &eit_mod->p_scrape_subtitle)) { tvhtrace(LS_TBL_EIT, " scrape subtitle '%s' from '%s' using %s on channel '%s'", buffer, summary, mod->id, ch ? channel_get_name(ch, channel_blank_name) : "(null)"); lang_str_t *ls = lang_str_create2(buffer, ev.default_charset); *save |= epg_episode_set_subtitle(ee, ls, &changes4); lang_str_destroy(ls); + if (bufs[1]) { + ls = lang_str_create2(buffer2, ev.default_charset); + *save |= epg_broadcast_set_summary(ebc, ls, &changes2); + lang_str_destroy(ls); + } } else { /* No subtitle found in summary buffer. */ *save |= epg_episode_set_subtitle(ee, ev.summary, &changes4); diff --git a/src/epggrab/module/eitpatternlist.c b/src/epggrab/module/eitpatternlist.c index 76d87b59f..21d8baa2e 100644 --- a/src/epggrab/module/eitpatternlist.c +++ b/src/epggrab/module/eitpatternlist.c @@ -16,6 +16,7 @@ * along with this program. If not, see . */ +#include #include #include "tvheadend.h" #include "eitpatternlist.h" @@ -47,23 +48,45 @@ void eit_pattern_compile_list ( eit_pattern_list_t *list, htsmsg_t *l ) void *eit_pattern_apply_list(char *buf, size_t size_buf, const char *text, eit_pattern_list_t *l) { - regmatch_t match[2]; + char *b[2] = { buf, NULL }; + size_t s[2] = { size_buf, 0 }; + return eit_pattern_apply_list_2(b, s, text, l); +} + +void *eit_pattern_apply_list_2(char *buf[2], size_t size_buf[2], const char *text, eit_pattern_list_t *l) +{ + regmatch_t match[3]; eit_pattern_t *p; ssize_t size; + assert(buf[0]); + assert(text); + if (!l) return NULL; /* search and report the first match */ TAILQ_FOREACH(p, l, p_links) - if (!regexec(&p->compiled, text, 2, match, 0) && match[1].rm_so != -1) { - size = MIN(match[1].rm_eo - match[1].rm_so, size_buf - 1); + if (!regexec(&p->compiled, text, 3, match, 0) && match[1].rm_so != -1) { + size = MIN(match[1].rm_eo - match[1].rm_so, size_buf[0] - 1); if (size > 0) { while (isspace(text[match[1].rm_so + size - 1])) size--; - memcpy(buf, text + match[1].rm_so, size); + memcpy(buf[0], text + match[1].rm_so, size); + } + buf[0][size] = '\0'; + if (match[2].rm_so != -1 && buf[1]) { + size = MIN(match[2].rm_eo - match[2].rm_so, size_buf[1] - 1); + if (size > 0) { + while (isspace(text[match[2].rm_so + size - 1])) + size--; + memcpy(buf[1], text + match[2].rm_so, size); + } + buf[1][size] = '\0'; + tvhtrace(LS_EPGGRAB," pattern \"%s\" matches with '%s' & '%s'", p->text, buf[0], buf[1]); + } else { + buf[1] = NULL; + tvhtrace(LS_EPGGRAB," pattern \"%s\" matches with '%s'", p->text, buf[0]); } - buf[size] = '\0'; - tvhtrace(LS_EPGGRAB," pattern \"%s\" matches with '%s'", p->text, buf); - return buf; + return buf[0]; } return NULL; } diff --git a/src/epggrab/module/eitpatternlist.h b/src/epggrab/module/eitpatternlist.h index a2e863d44..2f43ecadf 100644 --- a/src/epggrab/module/eitpatternlist.h +++ b/src/epggrab/module/eitpatternlist.h @@ -38,5 +38,11 @@ void eit_pattern_compile_list ( eit_pattern_list_t *list, htsmsg_t *l ); * Return the buf or NULL if no match. */ void *eit_pattern_apply_list(char *buf, size_t size_buf, const char *text, eit_pattern_list_t *l); +/* As eit_pattern_apply_list(), but return up to 2 matches. + * buf[0] & size_buf[0] are the first match, buf[1] & size_buf[1] the second. + * If no second match is found, set buf[1] to NULL. + * Return the first buf or NULL if no match. + */ +void *eit_pattern_apply_list_2(char *buf[2], size_t size_buf[2], const char *text, eit_pattern_list_t *l); void eit_pattern_free_list ( eit_pattern_list_t *l ); #endif diff --git a/support/eitscrape_test.py b/support/eitscrape_test.py index af60da705..7c68e2511 100755 --- a/support/eitscrape_test.py +++ b/support/eitscrape_test.py @@ -59,14 +59,14 @@ class EITScrapeTest(object): self.num_failed = 0; self.num_ok = 0; - def run_test_case_i(self, text, reg, expect, testing): + def run_test_case_i(self, text, reg, expect, testing, match=1): """Run a test case for text using the regular expression lists in reg, expecting the result of a match to be expect while running a test case for the string testing.""" for iter in reg: m = iter.search(text) if (m is not None): - result = m.group(1) + result = m.group(match) if result == expect: print 'OK: Got correct result of "%s" testing "%s" for "%s" using "%s"' % (result, testing, text, iter.pattern) self.num_ok = self.num_ok + 1 @@ -99,7 +99,7 @@ class EITScrapeTest(object): if key in ('age', 'genre'): print 'Test case contains key "%s" which is not currently tested for "%s"' % (key, test) - if key not in ('age', 'airdate', 'comment', 'episode', 'genre', 'new_subtitle', 'season', 'summary'): + if key not in ('age', 'airdate', 'comment', 'episode', 'genre', 'new_subtitle', 'new_summary', 'season', 'summary'): print 'Test case contains invalid key "%s" (possible typo) for "%s"' % (key, test) raise SyntaxWarning('Test case contains invalid/unknown key "%s" (possible typo) for "%s"' % (key, test)) @@ -116,6 +116,8 @@ class EITScrapeTest(object): self.run_test_case_i(text, airdate_reg, test['airdate'], "airdate") if test.has_key('new_subtitle'): self.run_test_case_i(text, subtitle_reg, test['new_subtitle'], "new_subtitle") + if test.has_key('new_summary'): + self.run_test_case_i(text, subtitle_reg, test['new_summary'], "new_summary", match=2)