From: Jim Hague Date: Wed, 14 Feb 2018 10:15:56 +0000 (+0000) Subject: eit: combine title and subtitle when scraping title with ' % ', fixes #4873 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e6a01316020037e4d030cc55a10427e1d7f6000b;p=thirdparty%2Ftvheadend.git eit: combine title and subtitle when scraping title with ' % ', fixes #4873 Currently title and subtitle are combined by joining with a single space. If the scrape is attempting to just modify the title, that's a problem because you don't know where the title ends. So instead join with ' % '. % isn't a regex metachar, so this is convenient for use in regexes. Update scraper test engine and UK regexes to match. --- diff --git a/data/conf/epggrab/eit/scrape/uk b/data/conf/epggrab/eit/scrape/uk index de3191ff0..346c065d0 100644 --- a/data/conf/epggrab/eit/scrape/uk +++ b/data/conf/epggrab/eit/scrape/uk @@ -23,7 +23,7 @@ "New[.:] " ], "scrape_title": [ - "^(.+)[.]{3}( )[.]{3}([^.?!:]+)(?:([?!])|[:.])" + "^(.+)[.]{3}( )% [.]{3}([^.?!:]+)(?:([?!])|[:.])" ], "scrape_subtitle": [ { @@ -53,7 +53,7 @@ ], "pcre": { "scrape_title": [ - "^(.+)[.]{3}( )[.]{3}(.*?)(?:([?!])|[:]|(?title, link) { - snprintf(title_summary, sizeof(title_summary), "%s %s", + snprintf(title_summary, sizeof(title_summary), "%s %% %s", se->str, lang_str_get(ev->summary, se->lang)); if (eit_pattern_apply_list(buffer, sizeof(buffer), title_summary, se->lang, &eit_mod->p_scrape_title)) { tvhtrace(LS_TBL_EIT, " scrape title '%s' from '%s' using %s", diff --git a/support/eitscrape_test.py b/support/eitscrape_test.py index 1c95d6043..831a0d6b7 100755 --- a/support/eitscrape_test.py +++ b/support/eitscrape_test.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright (C) 2017 Tvheadend Foundation CIC +# Copyright (C) 2017, 2018 Tvheadend Foundation CIC # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -170,7 +170,7 @@ class EITScrapeTest(object): canonical, _, _ = key.partition(':') text = test['summary'] if canonical == 'new_title': - text = test['title'] + ' ' + text + text = test['title'] + ' % ' + text if 'language' in test: lang = test['language'] else: