From: E.Smith <31170571+azlm8t@users.noreply.github.com> Date: Mon, 4 Sep 2017 22:42:56 +0000 (+0100) Subject: eit: Scrape sub-title from summary in OTA EIT. (#4578). X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=b0ba8e373da45ab9721e447ca8f093eb6d4164bc;p=thirdparty%2Ftvheadend.git eit: Scrape sub-title from summary in OTA EIT. (#4578). The Freeview/Freesat frequently have a subtitle as part of the summary. So we have "Treehouse of Horror IX: Three scary stories." from which we can deduce the subtitle as "Treehouse of Horror IX". Other variants are "...title_continuation. Subtitle" (so the real title of the program is split in to the summary), and "x/y. Subtitle" where x/y is the episode number. So allow scraping of this and use it as the subtitle. If we cannot scrape a subtitle then we continue the existing practice of using the summary buffer for the subtitle. The subtitle is currently NOT removed from the summary. Issue: #4578 --- diff --git a/data/conf/epggrab/eit/scrape/uk b/data/conf/epggrab/eit/scrape/uk index c37e89661..6f5ad66d8 100644 --- a/data/conf/epggrab/eit/scrape/uk +++ b/data/conf/epggrab/eit/scrape/uk @@ -16,5 +16,10 @@ ], "airdate": [ "\\(([0-9][0-9][0-9][0-9])\\)" + ], + "scrape_subtitle": [ + "^\\.\\.\\.[^:.]*\\[.:] ([^.0-9][^:]*): ", + "^[0-9]+/[0-9]+\\\. +([^.0-9][^:]*): ", + "^([^.0-9][^:]+): " ] } diff --git a/src/epggrab/module/eit.c b/src/epggrab/module/eit.c index 1bcf54172..d9c180946 100644 --- a/src/epggrab/module/eit.c +++ b/src/epggrab/module/eit.c @@ -53,6 +53,7 @@ typedef struct eit_module_t eit_pattern_list_t p_snum; eit_pattern_list_t p_enum; eit_pattern_list_t p_airdate; ///< Original air date parser + eit_pattern_list_t p_scrape_subtitle; } eit_module_t; /* ************************************************************************ @@ -651,8 +652,25 @@ static int _eit_process_event_one *save |= epg_episode_set_genre(ee, ev.genre, &changes4); if (ev.parental) *save |= epg_episode_set_age_rating(ee, ev.parental, &changes4); - if (ev.summary) - *save |= epg_episode_set_subtitle(ee, ev.summary, &changes4); + if (ev.summary) { + /* Freeview/Freesat have a subtitle as part of the summary in the format + * "subtitle: desc". So try and extract it and use that. + * If we can't find a subtitle then default to previous behaviour of + * setting the summary as the subtitle. + */ + const char *summary = lang_str_get(ev.summary, ev.default_charset); + char buffer[2048]; + if (eit_pattern_apply_list(buffer, sizeof(buffer), summary, &eit_mod->p_scrape_subtitle)) { + tvhtrace(LS_TBL_EIT, " scrape subtitle '%s' from '%s' using %s on channel '%s'", + buffer, summary, mod->id, + ch ? channel_get_name(ch, channel_blank_name) : "(null)"); + lang_str_t *ls = lang_str_create2(buffer, ev.default_charset); + *save |= epg_episode_set_subtitle(ee, ls, &changes4); + lang_str_destroy(ls); + } else { + *save |= epg_episode_set_subtitle(ee, ev.summary, &changes4); + } + } #if TODO_ADD_EXTRA if (ev.extra) *save |= epg_episode_set_extra(ee, extra, &changes4); @@ -1001,6 +1019,7 @@ static int _eit_scrape_load_one ( htsmsg_t *m, eit_module_t* mod ) eit_pattern_compile_list(&mod->p_snum, htsmsg_get_list(m, "season_num")); eit_pattern_compile_list(&mod->p_enum, htsmsg_get_list(m, "episode_num")); eit_pattern_compile_list(&mod->p_airdate, htsmsg_get_list(m, "airdate")); + eit_pattern_compile_list(&mod->p_scrape_subtitle, htsmsg_get_list(m, "scrape_subtitle")); return 1; }