From: E.Smith <31170571+azlm8t@users.noreply.github.com> Date: Wed, 22 Nov 2017 11:30:17 +0000 (+0000) Subject: eit: Add regex scraper for detecting new programmes in OTA. (#4594). X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=704d028a50050815f12d50a344ef77483fd6d80c;p=thirdparty%2Ftvheadend.git eit: Add regex scraper for detecting new programmes in OTA. (#4594). Issue: #4594. --- diff --git a/data/conf/epggrab/eit/scrape/uk b/data/conf/epggrab/eit/scrape/uk index 67e729bcf..f7b383db0 100644 --- a/data/conf/epggrab/eit/scrape/uk +++ b/data/conf/epggrab/eit/scrape/uk @@ -21,5 +21,8 @@ "^[.][.][.][^:.]*[.:] ([^.0-9][^:]*): ", "^[0-9]+/[0-9]+[.] +([^:]*): ", "^([^.0-9][^:]+): " + ], + "is_new" : [ + "^(New: )" ] } diff --git a/src/epggrab/module/eit.c b/src/epggrab/module/eit.c index 093560d0a..f261f56e2 100644 --- a/src/epggrab/module/eit.c +++ b/src/epggrab/module/eit.c @@ -55,6 +55,7 @@ typedef struct eit_module_t eit_pattern_list_t p_enum; eit_pattern_list_t p_airdate; ///< Original air date parser eit_pattern_list_t p_scrape_subtitle;///< Scrape subtitle from summary data + eit_pattern_list_t p_is_new; ///< Is programme new to air } eit_module_t; /* ************************************************************************ @@ -435,13 +436,16 @@ static int _eit_desc_crid * @param text - string from broadcaster to search. * @param eit_mod - our module with regex to use. * @param en - [out] episode data + * @param copyright_year [out] - copyright year + * @param is_new [out] - is episode new * @return Bitmask of changed fields. */ static uint32_t _eit_scrape_episode(const char *str, eit_module_t *eit_mod, epg_episode_num_t *en, - uint16_t *copyright_year) + uint16_t *copyright_year, + uint8_t *is_new) { if (!str) return 0; @@ -472,6 +476,13 @@ _eit_scrape_episode(const char *str, } } } + + /* Extract is_new flag. Any match is assumed to mean "new" */ + if (eit_pattern_apply_list(buffer, sizeof(buffer), str, &eit_mod->p_is_new)) { + *is_new = 1; + changed |= EPG_CHANGED_IS_NEW; + } + return changed; } @@ -652,6 +663,7 @@ static int _eit_process_event_one time_t first_aired = 0; uint32_t scraped = 0; uint16_t copyright_year = 0; + uint8_t is_new = 0; /* We search across all the main fields using the same regex and * merge the results with the last match taking precendence. So if @@ -661,19 +673,21 @@ static int _eit_process_event_one if (eit_mod->scrape_episode) { if (ev.title) scraped |= _eit_scrape_episode(lang_str_get(ev.title, ev.default_charset), - eit_mod, &en, ©right_year); + eit_mod, &en, ©right_year, &is_new); if (ev.desc) scraped |= _eit_scrape_episode(lang_str_get(ev.desc, ev.default_charset), - eit_mod, &en, ©right_year); + eit_mod, &en, ©right_year, &is_new); if (ev.summary) scraped |= _eit_scrape_episode(lang_str_get(ev.summary, ev.default_charset), - eit_mod, &en, ©right_year); + eit_mod, &en, ©right_year, &is_new); } /* Update Episode */ if (ee) { *save |= epg_broadcast_set_episode(ebc, ee, &changes2); + if (scraped & EPG_CHANGED_IS_NEW) + *save |= epg_broadcast_set_is_new(ebc, is_new, &changes2); *save |= epg_episode_set_is_bw(ee, ev.bw, &changes4); if (ev.title) *save |= epg_episode_set_title(ee, ev.title, &changes4); @@ -1057,6 +1071,7 @@ static void _eit_scrape_clear(eit_module_t *mod) eit_pattern_free_list(&mod->p_enum); eit_pattern_free_list(&mod->p_airdate); eit_pattern_free_list(&mod->p_scrape_subtitle); + eit_pattern_free_list(&mod->p_is_new); } static int _eit_scrape_load_one ( htsmsg_t *m, eit_module_t* mod ) @@ -1065,6 +1080,7 @@ static int _eit_scrape_load_one ( htsmsg_t *m, eit_module_t* mod ) eit_pattern_compile_list(&mod->p_snum, htsmsg_get_list(m, "season_num")); eit_pattern_compile_list(&mod->p_enum, htsmsg_get_list(m, "episode_num")); eit_pattern_compile_list(&mod->p_airdate, htsmsg_get_list(m, "airdate")); + eit_pattern_compile_list(&mod->p_is_new, htsmsg_get_list(m, "is_new")); } if (mod->scrape_subtitle) {