]> git.ipfire.org Git - thirdparty/tvheadend.git/commitdiff
Enhancements to XMLTV Parsing
authorDeltaMikeCharlie <127641886+DeltaMikeCharlie@users.noreply.github.com>
Sat, 18 Oct 2025 23:48:28 +0000 (10:48 +1100)
committerFlole <Flole998@users.noreply.github.com>
Sun, 26 Oct 2025 14:49:01 +0000 (15:49 +0100)
docs/class/epggrabber_modules.md
src/api/api_epg.c
src/epg.c
src/epg.h
src/epggrab.h
src/epggrab/module/xmltv.c
src/htsmsg_xml.c
src/htsmsg_xml.h
src/webui/static/img/doc/channel/grabber_xpath_fields.png [new file with mode: 0755]

index e4ca12d12595ac9b5ce20b3e789bb61f0d66baab..19c5adc94cc9d175a3688f8b79f26aa0adfcfb57 100644 (file)
@@ -64,3 +64,97 @@ means broadcast information such as summary information will still be
 retrieved.
 
 ---
+
+### XMLTV XPath Examples and Notes
+
+Although XMLTV is a standard, some providers of XMLTV data include additional information.
+XPath-like expressions can be used to extract some of this additional information
+for EPG grabbers that use XMLTV as a data source.
+
+!['EPG Grabber XPath'](static/img/doc/channel/grabber_xpath_fields.png)
+
+##Category Code
+
+Some information providers include free form category descriptions
+that are not compliant with the DVB EIT standard.
+
+In the following example, 'Cricket' is not a standard DVB EIT category.
+However, '0x40' is the standard code for 'Sport' and the provider has
+added this code to allow the standard code to be used when needed.
+
+```
+<programme start="yyyymmddHHMMSS +0000" stop="yyyymmddHHMMSS +0000" channel="2">
+    <category lang="en" eit="0x40">Cricket</category>
+</programme>
+```
+
+To extract this attribute for use in TVH, we should add `@eit` to the
+'Category Code XPath' field.  This will extract the hexadecimal code
+'0x40' and convert that to the standard category code 'Sport'.
+
+For the purposes of the category code, the root node is considered to be the
+standard `category` node within `programme`.
+
+##Unique Event Identifier
+
+By default, XMLTV does not provide a mechanism for uniquely identifying each event.
+
+In the following example, an XMLTV provider has added the non-standard `uniqueID`
+attribute to the `programme` node.
+```
+<programme uniqueID="1234" start="yyyymmddHHMMSS +0000" stop="yyyymmddHHMMSS +0000" channel="2">
+</programme>
+```
+To extract this attribute for use in TVH, we should add `@uniqueID` to the
+'Unique Event ID XPath' field.  This will assign '1234' as the unique
+identifier for this EPG event and will allow future updates matching
+this ID to be applied.
+
+For the purposes of the unique ID, the root node is considered to be `programme`.
+
+##SeriesLink and EpisodeLink
+
+A CRID (Content Reference IDentifier) is a mechanism used by broadcasters
+to identify events from the same series and multiple occurrences
+of the same episode in a series.  TVH refers to these as 'SeriesLink'
+and 'EpisodeLink'.  These fields can be used for recording a whole series
+or detecting a repeated episode.
+
+In the following example, the provider has added the non-standard `crid` node to the XMLTV data.
+This has been further broken down to include a `series` node and an `episode` node.
+
+```
+<programme uniqueID="1234" start="yyyymmddHHMMSS +0000" stop="yyyymmddHHMMSS +0000" channel="2">
+      <crid>
+            <series>crid://provider/abcde</series>
+            <episode>crid://provider/abcde_98765</episode>
+      </crid>
+</programme>
+```
+To extract these values, we should add `//crid/series/text()` and `//crid/episode/text()`
+to the 'SeriesLink XPath' and 'EpisodeLink XPath' fields respectively.
+
+For the purposes of the SeriesLink and EpisodeLink, the root node is
+considered to be `programme`.
+
+##SeriesLink and EpisodeLink Fallbacks
+
+If the XPath expression does not match any data and these options are enabled,
+TVH will perform its standard process for creating 'SeriesLink' and
+'EpisodeLink' values, otherwise, the fields will be left empty.
+
+##Notes
+
+TVH can only interpret the following subset of XPath identifier syntax:
+
+/ = Node
+
+@ = Attribute
+
+[] = Condition
+
+text() = Node text
+
+**Example:** //node1/node2[attrX=value]/@attrY
+
+---
\ No newline at end of file
index 35718cc274182e27b1b1d2c862230945d9d0da41..d82956d94d09fa4fd10bf0b89405b54e13ef16ca 100644 (file)
@@ -99,6 +99,12 @@ api_epg_entry ( epg_broadcast_t *eb, const char *lang, const access_t *perm, con
 
   /* EPG IDs */
   htsmsg_add_u32(m, "eventId", eb->id);
+
+  if(eb->xmltv_eid)  //This is the optional external reference provided by XMLTV.
+  {
+    htsmsg_add_str(m, "eventId_xmltv", eb->xmltv_eid);
+  }
+
   if (eb->episodelink && strncasecmp(eb->episodelink->uri, "tvh://", 6))
     htsmsg_add_str(m, "episodeUri", eb->episodelink->uri);
   if (eb->serieslink)
index c8f160233fa3909ba979ec2d13a09135314efea2..588b8753c1d84880c38585083046298655cdf7c6 100644 (file)
--- a/src/epg.c
+++ b/src/epg.c
@@ -79,6 +79,22 @@ static int _ebc_start_cmp ( const void *a, const void *b )
   return ((epg_broadcast_t*)a)->start - ((epg_broadcast_t*)b)->start;
 }
 
+static int _ebc_xmltv_cmp ( const void *a, const void *b )
+{
+
+  //Sometimes, nulls are passed to this function and the strcmp() crashes.
+  if(!((epg_broadcast_t*)a)->xmltv_eid)
+  {
+    return -1;
+  }
+  if(!((epg_broadcast_t*)b)->xmltv_eid)
+  {
+    return 1;
+  }
+
+  return strcmp(((epg_broadcast_t*)a)->xmltv_eid, ((epg_broadcast_t*)b)->xmltv_eid);
+}
+
 void epg_updated ( void )
 {
   epg_object_t *eo;
@@ -579,11 +595,25 @@ static epg_broadcast_t *_epg_channel_add_broadcast
 
   /* Find (only) */
   if ( !create ) {
-    return RB_FIND(&ch->ch_epg_schedule, *bcast, sched_link, _ebc_start_cmp);
+    if((*bcast)->xmltv_eid)
+    {
+      return RB_FIND(&ch->ch_epg_schedule, *bcast, sched_link, _ebc_xmltv_cmp);
+    }
+    else
+    {
+      return RB_FIND(&ch->ch_epg_schedule, *bcast, sched_link, _ebc_start_cmp);
+    }
 
   /* Find/Create */
   } else {
-    ret = RB_INSERT_SORTED(&ch->ch_epg_schedule, *bcast, sched_link, _ebc_start_cmp);
+    if((*bcast)->xmltv_eid)
+    {
+      ret = RB_INSERT_SORTED(&ch->ch_epg_schedule, *bcast, sched_link, _ebc_xmltv_cmp);
+    }
+    else
+    {
+      ret = RB_INSERT_SORTED(&ch->ch_epg_schedule, *bcast, sched_link, _ebc_start_cmp);
+    }
 
     /* New */
     if (!ret) {
@@ -697,7 +727,7 @@ static epg_broadcast_t *_epg_channel_add_broadcast
   if (timer) _epg_channel_timer_callback(ch);
   if (ret->ops->putref(ret)) return NULL;
   return ret;
-}
+}// END _epg_channel_add_broadcast
 
 void epg_channel_unlink ( channel_t *ch )
 {
@@ -936,6 +966,40 @@ static epg_broadcast_t **_epg_broadcast_skel ( void )
   return &skel;
 }
 
+//Prepare an EPG struct to search for an extant event
+//using the XMLTV unique ID.
+epg_broadcast_t *epg_broadcast_find_by_xmltv_eid
+  ( channel_t *channel, epggrab_module_t *src,
+    time_t start, time_t stop, int create,
+    int *save, epg_changes_t *changed, const char *xmltv_eid)
+{
+  epg_broadcast_t **ebc;
+  int             ret = 0;
+  if (!channel || !start || !stop || !xmltv_eid) return NULL;
+  if (stop <= start) return NULL;
+  if (stop <= gclk()) return NULL;
+
+  ebc = _epg_broadcast_skel();
+  (*ebc)->start         = start;
+  (*ebc)->stop          = stop;
+
+  if((*ebc)->xmltv_eid)
+  {
+    free((*ebc)->xmltv_eid);
+    (*ebc)->xmltv_eid     = NULL;
+  }
+  
+  ret = epg_broadcast_set_xmltv_eid(*ebc, xmltv_eid, changed);
+
+  //If the XMLTV ID was not set, exit.
+  if(!ret){
+    tvherror(LS_EPG, "Unable to set '%s' result '%d'", xmltv_eid, ret);
+    return NULL;
+  }
+
+  return _epg_channel_add_broadcast(channel, ebc, src, create, save, changed);
+}
+
 epg_broadcast_t *epg_broadcast_find_by_time
   ( channel_t *channel, epggrab_module_t *src,
     time_t start, time_t stop, int create, int *save, epg_changes_t *changed )
@@ -948,6 +1012,7 @@ epg_broadcast_t *epg_broadcast_find_by_time
   ebc = _epg_broadcast_skel();
   (*ebc)->start   = start;
   (*ebc)->stop    = stop;
+  (*ebc)->xmltv_eid = NULL;
 
   return _epg_channel_add_broadcast(channel, ebc, src, create, save, changed);
 }
@@ -963,7 +1028,10 @@ int epg_broadcast_change_finish
   if (!(changes & EPG_CHANGED_EPISODE))
     save |= epg_broadcast_set_episodelink_uri(broadcast, NULL, NULL);
   if (!(changes & EPG_CHANGED_DVB_EID))
-    save |= epg_broadcast_set_dvb_eid(broadcast, 0, NULL);
+    {
+      save |= epg_broadcast_set_dvb_eid(broadcast, 0, NULL);
+      save |= epg_broadcast_set_xmltv_eid(broadcast, NULL, NULL);
+    }
   if (!(changes & EPG_CHANGED_IS_WIDESCREEN))
     save |= epg_broadcast_set_is_widescreen(broadcast, 0, NULL);
   if (!(changes & EPG_CHANGED_IS_HD))
@@ -1041,6 +1109,7 @@ epg_broadcast_t *epg_broadcast_clone
                                    1, save, &changes);
   if (ebc) {
     /* Copy metadata */
+    *save |= epg_broadcast_set_xmltv_eid(ebc, src->xmltv_eid, &changes);
     *save |= epg_broadcast_set_is_widescreen(ebc, src->is_widescreen, &changes);
     *save |= epg_broadcast_set_is_hd(ebc, src->is_hd, &changes);
     *save |= epg_broadcast_set_is_bw(ebc, src->is_bw, &changes);
@@ -1143,6 +1212,17 @@ int epg_broadcast_set_dvb_eid
                              changed, EPG_CHANGED_DVB_EID);
 }
 
+int epg_broadcast_set_xmltv_eid
+  ( epg_broadcast_t *b, const char *xmltv_eid, epg_changes_t *changed )
+{
+  int save;
+  if (!b) return 0;
+  save = _epg_object_set_str(b, &b->xmltv_eid, xmltv_eid,
+                             changed, EPG_CHANGED_DVB_EID);
+
+  return save;
+}
+
 int epg_broadcast_set_is_widescreen
   ( epg_broadcast_t *b, uint8_t ws, epg_changes_t *changed )
 {
@@ -1559,6 +1639,8 @@ htsmsg_t *epg_broadcast_serialize ( epg_broadcast_t *broadcast )
     htsmsg_add_str(m, "ch", channel_get_uuid(broadcast->channel, ubuf));
   if (broadcast->dvb_eid)
     htsmsg_add_u32(m, "eid", broadcast->dvb_eid);
+  if (broadcast->xmltv_eid)
+    htsmsg_add_str(m, "xeid", broadcast->xmltv_eid);
   if (broadcast->is_widescreen)
     htsmsg_add_u32(m, "is_wd", 1);
   if (broadcast->is_hd)
@@ -1663,6 +1745,8 @@ epg_broadcast_t *epg_broadcast_deserialize
   /* Get metadata */
   if (!htsmsg_get_u32(m, "eid", &eid))
     *save |= epg_broadcast_set_dvb_eid(ebc, eid, &changes);
+  if ((str = htsmsg_get_str(m, "xeid")))
+    *save |= epg_broadcast_set_xmltv_eid(ebc, str, &changes);
   if (!htsmsg_get_u32(m, "is_wd", &u32))
     *save |= epg_broadcast_set_is_widescreen(ebc, u32, &changes);
   if (!htsmsg_get_u32(m, "is_hd", &u32))
index da595fd397034497fd020ba951d1d5aab0823935..930ffd9110c297c68e31b17663d342f801064149 100644 (file)
--- a/src/epg.h
+++ b/src/epg.h
@@ -310,6 +310,7 @@ struct epg_broadcast
                                                ///< We'll call it copyright_year since words like "complete" and "finished"
                                                ///< sound too similar to dvr recorded functionality. We'll only store the
                                                ///< year since we only get year not month and day.
+  char                       *xmltv_eid;       ///< XMLTV (or other) unique event identifier
 };
 
 /* Lookup */
@@ -318,6 +319,10 @@ epg_broadcast_t *epg_broadcast_find_by_time
     time_t start, time_t stop, int create, int *save, epg_changes_t *changes );
 epg_broadcast_t *epg_broadcast_find_by_eid ( struct channel *ch, uint16_t eid );
 epg_broadcast_t *epg_broadcast_find_by_id  ( uint32_t id );
+epg_broadcast_t *epg_broadcast_find_by_xmltv_eid
+  ( struct channel *ch, struct epggrab_module *src,
+    time_t start, time_t stop, int create,
+    int *save, epg_changes_t *changed, const char* xmltv_eid);
 
 /* Post-modify */
 int epg_broadcast_change_finish( epg_broadcast_t *b, epg_changes_t changed, int merge )
@@ -331,6 +336,9 @@ epg_broadcast_t *epg_broadcast_clone
 int epg_broadcast_set_dvb_eid
   ( epg_broadcast_t *b, uint16_t dvb_eid, epg_changes_t *changed )
   __attribute__((warn_unused_result));
+int epg_broadcast_set_xmltv_eid
+  ( epg_broadcast_t *b, const char *xmltv_eid, epg_changes_t *changed )
+  __attribute__((warn_unused_result));
 int epg_broadcast_set_running
   ( epg_broadcast_t *b, epg_running_t running )
   __attribute__((warn_unused_result));
index 590df45dfbb4e37ffbb41cf75b9a9afbe884967b..974bfd6151f60bafddf0dd17473e19a611816b97 100644 (file)
@@ -200,6 +200,13 @@ struct epggrab_module_int
     ///< and extra details on to programme description for viewing by legacy clients.
   int                           xmltv_use_category_not_genre; ///< Use category tags and don't map to DVB genres.
 
+  const char                   *xmltv_xpath_category_code; ///< XPath string for extracting a category ETSI code.
+  const char                   *xmltv_xpath_unique_id;     ///< XPath string for extracting a unique event ID.
+  const char                   *xmltv_xpath_series_link;   ///< XPath string for extracting a series link.
+  const char                   *xmltv_xpath_episode_link;  ///< XPath string for extracting an episode link.
+  int                           xmltv_xpath_series_use_standard; ///< If the XPath node is not found, use the standard TVH routine.
+  int                           xmltv_xpath_episode_use_standard; ///< If the XPath node is not found, use the standard TVH routine.
+
   /* Handle data */
   char*     (*grab)   ( void *mod );
   htsmsg_t* (*trans)  ( void *mod, char *data );
index fe4b4e14e073d5a1c13272f4404c7f61bc80179f..ef437180d4be65e1d34bd0ddd5ac25cb0ee5d307 100644 (file)
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Notes - DMC April 2024.
+ *
+ *  The XMLTV data received is first converted to a htsmsg format.
+ *  Various tags and attributes are then extracted from the htsmsg
+ *  and saved as EPG data.
+ *
+ *  PLEASE NOTE: TVHeadEnd only processes a subset of the XMLTV schema,
+ *               plus a non-standard tag <summary>.
  */
 
 #include <assert.h>
 #define XMLTV_FIND "tv_find_grabbers"
 #define XMLTV_GRAB "tv_grab_"
 
+/*
+ * Global variables for XPaths
+ */
+htsmsg_t                            *xmltv_xpath_category_code = NULL;
+htsmsg_t                            *xmltv_xpath_unique = NULL;
+htsmsg_t                            *xmltv_xpath_series = NULL;
+htsmsg_t                            *xmltv_xpath_episode = NULL;
+int                                 xmltv_xpath_series_fallback = 0;
+int                                 xmltv_xpath_episode_fallback = 0;
+
 /* **************************************************************************
  * Parsing
  * *************************************************************************/
@@ -533,6 +552,15 @@ static int _xmltv_parse_age_rating
 
 /*
  * Parse category list
+ * <category lang="en" code="0xaf">Leisure hobbies</category>
+ * <category lang="en" code="0x45">Cricket</category>
+ * NOTE:
+ * TVH seems to refer to the ETSI code as the 'genre' and to the
+ * text description as the 'category'.
+ * There is no ETSI code for 'Cricket', the closest is 0x45 'Team Sports'.
+ * In the above example, the genre is saved as 0x45, however, if scraping
+ * for 'extra information' is enabled, the text 'Cricket' will be added to
+ * the 'category' list.
  */
 static epg_genre_list_t
 *_xmltv_parse_categories ( htsmsg_t *tags )
@@ -540,10 +568,60 @@ static epg_genre_list_t
   htsmsg_t *e;
   htsmsg_field_t *f;
   epg_genre_list_t *egl = NULL;
+  const char *cat_name;
+  uint8_t cat_val;
+  int cat_flag = 0;
+  const char *cat_etsi;
+
   HTSMSG_FOREACH(f, tags) {
     if (!strcmp(htsmsg_field_name(f), "category") && (e = htsmsg_get_map_by_field(f))) {
-      if (!egl) egl = calloc(1, sizeof(epg_genre_list_t));
-      epg_genre_list_add_by_str(egl, htsmsg_get_str(e, "cdata"), NULL);
+
+      cat_name = htsmsg_get_str(e, "cdata");
+
+      cat_etsi = NULL;
+      //If we have an XPath expression to search
+      if(xmltv_xpath_category_code)
+      {
+        cat_etsi = htsmsg_xml_xpath_search(e, xmltv_xpath_category_code);
+
+        cat_flag = 0;
+
+        //If we got a category code, use that instead of the text.
+        //https://www.etsi.org/deliver/etsi_en/300400_300499/300468/01.17.01_20/en_300468v011701a.pdf
+        //Table 29
+        if(cat_etsi && (strlen(cat_etsi) > 2))
+        {
+          tvhdebug(LS_XMLTV, "Identified XPath Category Code: '%s'", cat_etsi);
+          cat_val = 0;
+          if(cat_etsi[0] == '0' && (cat_etsi[1] == 'x' || cat_etsi[1] == 'X'))  //If the code starts with '0x', look for HEX values.
+          {
+            sscanf(cat_etsi+2, "%hhx", &cat_val);
+
+            if(cat_val != 0)
+            {
+              tvhdebug(LS_XMLTV, "XPath category code '%s' recognised as ETSI '0x%02x'.", cat_etsi, cat_val);
+              if (!egl) egl = calloc(1, sizeof(epg_genre_list_t));
+              cat_flag = epg_genre_list_add_by_eit (egl, cat_val);
+            }
+            else
+            {
+              tvhdebug(LS_XMLTV, "XPath category code '%s' failed.  Invalid hex.", cat_etsi);
+              cat_flag = 0;
+            }
+          }
+        }//END we have a category code
+        else
+        {
+          tvhdebug(LS_XMLTV, "XPath category code '%s' unusable, matching text '%s' instead.", cat_etsi, cat_name);
+        }
+      }//END we have a category XPath
+
+      //If a hex value was not found or is invalid, use the text value instead.
+      if(!cat_flag)
+      {
+        if (!egl) egl = calloc(1, sizeof(epg_genre_list_t));
+        epg_genre_list_add_by_str(egl, cat_name, NULL);
+      }
     }
   }
   return egl;
@@ -717,7 +795,7 @@ static int _xmltv_parse_programme_tags
   const int use_category_not_genre = ((epggrab_module_int_t *)mod)->xmltv_use_category_not_genre;
   int save = 0;
   epg_changes_t changes = 0;
-  epg_broadcast_t *ebc;
+  epg_broadcast_t *ebc = NULL;
   epg_genre_list_t *egl;
   epg_episode_num_t epnum;
   epg_set_t *set;
@@ -729,6 +807,10 @@ static int _xmltv_parse_programme_tags
   time_t first_aired = 0;
   int8_t bw = -1;
 
+  const char  *temp_unique = htsmsg_get_str(tags, "@@UNIQUE");
+  const char  *temp_series = htsmsg_get_str(tags, "@@SERIES");
+  const char  *temp_episode = htsmsg_get_str(tags, "@@EPISODE");
+
   if (epg_channel_ignore_broadcast(ch, start))
     return 0;
 
@@ -737,13 +819,55 @@ static int _xmltv_parse_programme_tags
   /*
    * Broadcast
    */
-  ebc = epg_broadcast_find_by_time(ch, mod, start, stop, 1, &save, &changes);
+
+  //If we got a unique XPath field, try to match an existing event based on that,
+  //if not, use the normal match based on start time only.
+  if(temp_unique)
+  {
+    tvhtrace(LS_XMLTV, "Searching for EPG event using XPath unique ID '%s'.", temp_unique);
+    ebc = epg_broadcast_find_by_xmltv_eid(ch, mod, start, stop, 1, &save, &changes, temp_unique);
+    //NULL will be returned if there is no match found.
+    if(ebc)
+    {
+      tvhtrace(LS_XMLTV, "Matched ID '%s' start '%"PRItime_t"/%"PRItime_t"' stop '%"PRItime_t"/%"PRItime_t"'.", temp_unique, ebc->start, start, ebc->stop, stop);
+      ebc->start = start;
+      ebc->stop = stop;
+    }
+    else
+    {
+      tvhtrace(LS_XMLTV, "No match for EPG event using XPath unique ID '%s'.", temp_unique);
+    }
+  }
+  
+  //If the broadcast event is still null, then either there was no XMLTV unique ID
+  //or there was, but it failed to match.  The later is an edge case when this feature
+  //has been newly enabled with existing events already present.  They will not match
+  //they expire.
+  if(!ebc)
+  {
+    tvhtrace(LS_XMLTV, "Searching for EPG event using start/stop.");
+    ebc = epg_broadcast_find_by_time(ch, mod, start, stop, 1, &save, &changes);
+    if(ebc){
+      tvhtrace(LS_XMLTV, "Matched EPG event using start/stop.");
+    }
+    else
+    {
+      tvhtrace(LS_XMLTV, "No match for EPG event using start/stop.");
+    }
+  }
+
   if (!ebc)
     return 0;
   stats->broadcasts.total++;
   if (save && (changes & EPG_CHANGED_CREATE))
     stats->broadcasts.created++;
 
+  /* Save the unique ID string */
+  if(temp_unique)
+  {
+    save |= epg_broadcast_set_xmltv_eid(ebc, temp_unique, &changes);
+  }
+  
   /* Description/summary (wait for episode first) */
   _xmltv_parse_lang_str(&desc, tags, "desc");
   _xmltv_parse_lang_str(&summary, tags, "summary");
@@ -810,6 +934,53 @@ static int _xmltv_parse_programme_tags
    */
   get_episode_info(mod, tags, &uri, &suri, &epnum);
 
+  if(temp_series)
+  {
+    if(suri)
+    {
+      free(suri);
+    }
+    suri = strdup(temp_series);
+  }
+  else
+  {
+    //If there was an XPath for series, but nothing was found
+    //AND we are NOT falling back to the standard method,
+    //then erase the crid that TVH manufactured from the module/series/episode.
+    if(xmltv_xpath_series && !xmltv_xpath_series_fallback)
+    {
+      if(suri)
+      {
+        free(suri);
+        suri = NULL;
+      }
+    }
+
+  }
+
+  if(temp_episode)
+  {
+    if(uri)
+    {
+      free(uri);
+    }
+    uri = strdup(temp_episode);
+  }
+  else
+  {
+    //If there was an XPath for episode, but nothing was found
+    //AND we are NOT falling back to the standard method,
+    //then erase the crid that TVH manufactured from the module/series/episode.
+    if(xmltv_xpath_episode && !xmltv_xpath_episode_fallback)
+    {
+      if(uri)
+      {
+        free(uri);
+        uri = NULL;
+      }
+    }
+
+  }
   /*
    * Series Link
    */
@@ -832,6 +1003,10 @@ static int _xmltv_parse_programme_tags
   if (uri) {
     set = ebc->episodelink;
     save |= epg_broadcast_set_episodelink_uri(ebc, uri, &changes);
+    //DMC 28-Mar-2024.
+    //This free() was added because compared to the series link above
+    //it looked like not having it would lead to a memory leak.
+    free(uri);
     stats->episodes.total++;
     if (changes & EPG_CHANGED_EPISODE) {
       if (set == NULL)
@@ -916,6 +1091,7 @@ static int _xmltv_parse_programme
     stats->channels.modified++;
   }
   if (!LIST_FIRST(&ec->channels)) return 0;
+
   if((s       = htsmsg_get_str(attribs, "start"))   == NULL) return 0;
   start = _xmltv_str2time(s);
   if((s       = htsmsg_get_str(attribs, "stop"))    == NULL) return 0;
@@ -925,6 +1101,47 @@ static int _xmltv_parse_programme
      (attribs = htsmsg_get_map(subtag,  "attrib")) != NULL)
     icon = htsmsg_get_str(attribs, "src");
 
+  const char  *temp_unique;
+  const char  *temp_series;
+  const char  *temp_episode;
+
+  //NOTE - DMC April 2024
+  //The XPath values need to be searched for here, before the rest of the processing,
+  //because the attributes of the root <programme> node are not available past
+  //this point.  Only sub-nodes of <programme> are passed on to the next function.
+  //If XPath values are found here, add them to the htsmsg using special '@@'
+  //field names which can then be passed to the next function for further processing.
+
+  //Search the current programme for XPath matches
+  if(xmltv_xpath_unique)
+  {
+    temp_unique = htsmsg_xml_xpath_search(body, xmltv_xpath_unique);
+    //If an XPath ID has been found, stash it in htsmsg so that it can
+    //be retrieved by the next function.
+    if(temp_unique)
+    {
+      htsmsg_add_str(tags, "@@UNIQUE", temp_unique);
+    }
+  }//END stash the XPath unique ID
+
+  if(xmltv_xpath_series)
+  {
+    temp_series = htsmsg_xml_xpath_search(body, xmltv_xpath_series);
+    if(temp_series)
+    {
+      htsmsg_add_str(tags, "@@SERIES", temp_series);
+    }
+  }
+
+  if(xmltv_xpath_episode)
+  {
+    temp_episode = htsmsg_xml_xpath_search(body, xmltv_xpath_episode);
+    if(temp_episode)
+    {
+      htsmsg_add_str(tags, "@@EPISODE", temp_episode);
+    }
+  }
+
   if(stop <= start || stop <= gclk()) return 0;
 
   ec->laststamp = gclk();
@@ -1028,7 +1245,16 @@ static int _xmltv_parse_channel
 }
 
 /**
- *
+ *<tv>
+ *  <channel>
+ *     ...channel data
+ *  <\channel>
+ *  ...multiple channels
+ *  <programme>
+ *     ...programme data
+ *  <\programme>
+ *  ...multiple programmes
+ *</tv>
  */
 static int _xmltv_parse_tv
   (epggrab_module_t *mod, htsmsg_t *body, epggrab_stats_t *stats)
@@ -1040,6 +1266,82 @@ static int _xmltv_parse_tv
   if((tags = htsmsg_get_map(body, "tags")) == NULL)
     return 0;
 
+  //Pre-process the XPaths
+  //Only done once per XMLTV session.
+  if(((epggrab_module_int_t *)mod)->xmltv_xpath_category_code)
+  {
+    tvhtrace(LS_XMLTV, "Parsing Category Code XPath: '%s'.", ((epggrab_module_int_t *)mod)->xmltv_xpath_category_code);
+    xmltv_xpath_category_code = htsmsg_xml_parse_xpath(((epggrab_module_int_t *)mod)->xmltv_xpath_category_code);
+
+    if(htsmsg_is_empty(xmltv_xpath_category_code))
+    {
+      tvhtrace(LS_XMLTV, "Failed to parse Category Code XPath '%s'.", ((epggrab_module_int_t *)mod)->xmltv_xpath_category_code);
+    }
+  }
+  else
+  {
+    tvhtrace(LS_XMLTV, "Category Code XPath not found.");
+  }
+
+  if(((epggrab_module_int_t *)mod)->xmltv_xpath_unique_id)
+  {
+    tvhtrace(LS_XMLTV, "Parsing Unique ID XPath: '%s'.", ((epggrab_module_int_t *)mod)->xmltv_xpath_unique_id);
+    xmltv_xpath_unique = htsmsg_xml_parse_xpath(((epggrab_module_int_t *)mod)->xmltv_xpath_unique_id);
+
+    if(htsmsg_is_empty(xmltv_xpath_unique))
+    {
+      tvhtrace(LS_XMLTV, "Failed to parse Unique ID XPath '%s'.", ((epggrab_module_int_t *)mod)->xmltv_xpath_unique_id);
+    }
+  }
+  else
+  {
+    tvhtrace(LS_XMLTV, "Unique ID XPath not found.");
+  }
+
+  if(((epggrab_module_int_t *)mod)->xmltv_xpath_series_link)
+  {
+    tvhtrace(LS_XMLTV, "Parsing SeriesLink XPath: '%s'.", ((epggrab_module_int_t *)mod)->xmltv_xpath_series_link);
+    xmltv_xpath_series = htsmsg_xml_parse_xpath(((epggrab_module_int_t *)mod)->xmltv_xpath_series_link);
+
+    if(htsmsg_is_empty(xmltv_xpath_series))
+    {
+      tvhtrace(LS_XMLTV, "Failed to parse SeriesLink XPath '%s'.", ((epggrab_module_int_t *)mod)->xmltv_xpath_series_link);
+    }
+  }
+  else
+  {
+    tvhtrace(LS_XMLTV, "SeriesLink XPath not found.");
+  }
+
+  if(((epggrab_module_int_t *)mod)->xmltv_xpath_episode_link)
+  {
+    tvhtrace(LS_XMLTV, "Parsing EpisodeLink XPath: '%s'.", ((epggrab_module_int_t *)mod)->xmltv_xpath_episode_link);
+    xmltv_xpath_episode = htsmsg_xml_parse_xpath(((epggrab_module_int_t *)mod)->xmltv_xpath_episode_link);
+
+    if(htsmsg_is_empty(xmltv_xpath_episode))
+    {
+      tvhtrace(LS_XMLTV, "Failed to parse EpisodeLink XPath '%s'.", ((epggrab_module_int_t *)mod)->xmltv_xpath_episode_link);
+    }
+  }
+  else
+  {
+    tvhtrace(LS_XMLTV, "EpisodeLink XPath not found.");
+  }
+
+  //Set the fallback flags.
+  xmltv_xpath_series_fallback = 0;
+  if(((epggrab_module_int_t *)mod)->xmltv_xpath_series_use_standard)
+  {
+    xmltv_xpath_series_fallback = 1;
+  }
+
+  xmltv_xpath_episode_fallback = 0;
+  if(((epggrab_module_int_t *)mod)->xmltv_xpath_episode_use_standard)
+  {
+    xmltv_xpath_episode_fallback = 1;
+  }
+  //Finished pre-processing the XPath stuff.
+
   tvh_mutex_lock(&global_lock);
   epggrab_channel_begin_scan(mod);
   tvh_mutex_unlock(&global_lock);
@@ -1063,6 +1365,23 @@ static int _xmltv_parse_tv
   epggrab_channel_end_scan(mod);
   tvh_mutex_unlock(&global_lock);
 
+  //If XPaths were used, release the parsed paths.
+  if(xmltv_xpath_unique)
+  {
+    htsmsg_destroy(xmltv_xpath_unique);
+  }
+  if(xmltv_xpath_series)
+  {
+    htsmsg_destroy(xmltv_xpath_series);
+  }
+  if(xmltv_xpath_episode)
+  {
+    htsmsg_destroy(xmltv_xpath_episode);
+  }
+  if(xmltv_xpath_category_code)
+  {
+    htsmsg_destroy(xmltv_xpath_category_code);
+  }
   return gsave;
 }
 
@@ -1122,6 +1441,42 @@ static int _xmltv_parse
      "If this option is not ticked then we continue to map " \
      "xmltv categories to genres and supply both to clients.")
 
+#define XPATH_CATEGORY_CODE N_("Category Code XPath")
+#define XPATH_CATEGORY_CODE_DESC \
+  N_("The XPath-like expression used to extract the category "\
+     "ETSI code from the XMLTV data. Root node = 'category'.")
+
+#define XPATH_UNIQUE_ID_NAME N_("Unique Event ID XPath")
+#define XPATH_UNIQUE_ID_DESC \
+  N_("The XPath-like expression used to extract a unique event "\
+     "identifier from the XMLTV data.  This ID is used to "\
+     "match existing EPG events so that they can be updated " \
+     "rather than replaced. Root node = 'programme'.")
+
+#define XPATH_SERIES_LINK_NAME N_("SeriesLink XPath")
+#define XPATH_SERIES_LINK_DESC \
+  N_("The XPath-like expression used to extract a SeriesLink "\
+     "identifier from the XMLTV data.  This ID is used "\
+     "to identify multiple occurrences of the same series. "\
+     " Root node = 'programme'.")
+
+#define XPATH_EPISODE_LINK_NAME N_("EpisodeLink XPath")
+#define XPATH_EPISODE_LINK_DESC \
+  N_("The XPath-like expression used to extract an EpisodeLink "\
+     "identifier from the XMLTV data.  This ID is used "\
+     "to identify multiple occurrences of the same episode. "\
+     " Root node = 'programme'.")
+
+#define XPATH_SERIES_USE_STANDARD_NAME N_("SeriesLink XPath fallback")
+#define XPATH_SERIES_USE_STANDARD_DESC \
+  N_("If a SeriesLink XPath is not found, use the standard TVH "\
+     "method for creating a SeriesLink.")
+
+#define XPATH_EPISODE_USE_STANDARD_NAME N_("EpisodeLink XPath fallback")
+#define XPATH_EPISODE_USE_STANDARD_DESC \
+  N_("If an EpisodeLink XPath is not found, use the standard TVH "\
+     "method for creating an EpisodeLink.")
+
 static htsmsg_t *
 xmltv_dn_chnum_list ( void *o, const char *lang )
 {
@@ -1137,6 +1492,17 @@ const idclass_t epggrab_mod_int_xmltv_class = {
   .ic_super      = &epggrab_mod_int_class,
   .ic_class      = "epggrab_mod_int_xmltv",
   .ic_caption    = N_("EPG - Internal XMLTV EPG Grabber"),
+  .ic_groups     = (const property_group_t[]) {
+      {
+         .name   = N_("General Settings"),
+         .number = 1,
+      },
+      {
+         .name   = N_("XPath Settings"),
+         .number = 2,
+      },
+    {}
+  },
   .ic_properties = (const property_t[]){
     {
       .type   = PT_INT,
@@ -1172,6 +1538,54 @@ const idclass_t epggrab_mod_int_xmltv_class = {
       .off    = offsetof(epggrab_module_int_t, xmltv_use_category_not_genre),
       .group  = 1
     },
+    {
+      .type   = PT_STR,
+      .id     = "xpath_category_code",
+      .name   = XPATH_CATEGORY_CODE,
+      .desc   = XPATH_CATEGORY_CODE_DESC,
+      .off    = offsetof(epggrab_module_int_t, xmltv_xpath_category_code),
+      .group  = 2
+    },
+    {
+      .type   = PT_STR,
+      .id     = "xpath_unique",
+      .name   = XPATH_UNIQUE_ID_NAME,
+      .desc   = XPATH_UNIQUE_ID_DESC,
+      .off    = offsetof(epggrab_module_int_t, xmltv_xpath_unique_id),
+      .group  = 2
+    },
+    {
+      .type   = PT_STR,
+      .id     = "xpath_serieslink",
+      .name   = XPATH_SERIES_LINK_NAME,
+      .desc   = XPATH_SERIES_LINK_DESC,
+      .off    = offsetof(epggrab_module_int_t, xmltv_xpath_series_link),
+      .group  = 2
+    },
+    {
+      .type   = PT_STR,
+      .id     = "xpath_episodelink",
+      .name   = XPATH_EPISODE_LINK_NAME,
+      .desc   = XPATH_EPISODE_LINK_DESC,
+      .off    = offsetof(epggrab_module_int_t, xmltv_xpath_episode_link),
+      .group  = 2
+    },
+    {
+      .type   = PT_BOOL,
+      .id     = "xpath_series_use_standard",
+      .name   = XPATH_SERIES_USE_STANDARD_NAME,
+      .desc   = XPATH_SERIES_USE_STANDARD_DESC,
+      .off    = offsetof(epggrab_module_int_t, xmltv_xpath_series_use_standard),
+      .group  = 2
+    },
+    {
+      .type   = PT_BOOL,
+      .id     = "xpath_episode_use_standard",
+      .name   = XPATH_EPISODE_USE_STANDARD_NAME,
+      .desc   = XPATH_EPISODE_USE_STANDARD_DESC,
+      .off    = offsetof(epggrab_module_int_t, xmltv_xpath_episode_use_standard),
+      .group  = 2
+    },
     {}
   }
 };
@@ -1180,6 +1594,17 @@ const idclass_t epggrab_mod_ext_xmltv_class = {
   .ic_super      = &epggrab_mod_ext_class,
   .ic_class      = "epggrab_mod_ext_xmltv",
   .ic_caption    = N_("EPG - External XMLTV EPG Grabber"),
+  .ic_groups     = (const property_group_t[]) {
+      {
+         .name   = N_("General Settings"),
+         .number = 1,
+      },
+      {
+         .name   = N_("XPath Settings"),
+         .number = 2,
+      },
+    {}
+  },
   .ic_properties = (const property_t[]){
     {
       .type   = PT_BOOL,
@@ -1214,6 +1639,54 @@ const idclass_t epggrab_mod_ext_xmltv_class = {
       .off    = offsetof(epggrab_module_int_t, xmltv_use_category_not_genre),
       .group  = 1
     },
+    {
+      .type   = PT_STR,
+      .id     = "xpath_category_code",
+      .name   = XPATH_CATEGORY_CODE,
+      .desc   = XPATH_CATEGORY_CODE_DESC,
+      .off    = offsetof(epggrab_module_int_t, xmltv_xpath_category_code),
+      .group  = 2
+    },
+    {
+      .type   = PT_STR,
+      .id     = "xpath_unique",
+      .name   = XPATH_UNIQUE_ID_NAME,
+      .desc   = XPATH_UNIQUE_ID_DESC,
+      .off    = offsetof(epggrab_module_int_t, xmltv_xpath_unique_id),
+      .group  = 2
+    },
+    {
+      .type   = PT_STR,
+      .id     = "xpath_serieslink",
+      .name   = XPATH_SERIES_LINK_NAME,
+      .desc   = XPATH_SERIES_LINK_DESC,
+      .off    = offsetof(epggrab_module_int_t, xmltv_xpath_series_link),
+      .group  = 2
+    },
+    {
+      .type   = PT_STR,
+      .id     = "xpath_episodelink",
+      .name   = XPATH_EPISODE_LINK_NAME,
+      .desc   = XPATH_EPISODE_LINK_DESC,
+      .off    = offsetof(epggrab_module_int_t, xmltv_xpath_episode_link),
+      .group  = 2
+    },
+    {
+      .type   = PT_BOOL,
+      .id     = "xpath_series_use_standard",
+      .name   = XPATH_SERIES_USE_STANDARD_NAME,
+      .desc   = XPATH_SERIES_USE_STANDARD_DESC,
+      .off    = offsetof(epggrab_module_int_t, xmltv_xpath_series_use_standard),
+      .group  = 2
+    },
+    {
+      .type   = PT_BOOL,
+      .id     = "xpath_episode_use_standard",
+      .name   = XPATH_EPISODE_USE_STANDARD_NAME,
+      .desc   = XPATH_EPISODE_USE_STANDARD_DESC,
+      .off    = offsetof(epggrab_module_int_t, xmltv_xpath_episode_use_standard),
+      .group  = 2
+    },
     {}
   }
 };
index 4123e74d65c54ee45fd65d954692539f73531a8b..62ff1f3844920950c35422d1dc6578bd5d6a775c 100644 (file)
@@ -25,7 +25,7 @@
  * htsmsg's with UTF-8 encoded payloads
  *
  *  Supports:                             Example:
- *  
+ *
  *  Comments                              <!--  a comment               -->
  *  Processing Instructions               <?xml                          ?>
  *  CDATA                                 <![CDATA[  <litteraly copied> ]]>
@@ -96,7 +96,7 @@ typedef struct cdata_content {
   char cc_buf[0];
 } cdata_content_t;
 
-static char *htsmsg_xml_parse_cd(xmlparser_t *xp, 
+static char *htsmsg_xml_parse_cd(xmlparser_t *xp,
                                 htsmsg_t *parent, char *src);
 
 /**
@@ -245,7 +245,7 @@ htsmsg_xml_parse_attrib
   while(is_xmlws(*src))
     src++;
 
-  
+
   /* Parse attribute payload */
   quote = *src++;
   if(quote != '"' && quote != '\'') {
@@ -274,7 +274,7 @@ htsmsg_xml_parse_attrib
   while(is_xmlws(*src))
     src++;
 
-  if(xmlns_scope_list != NULL && 
+  if(xmlns_scope_list != NULL &&
      attriblen > 6 && !memcmp(attribname, "xmlns:", 6)) {
 
     attribname += 6;
@@ -383,7 +383,7 @@ htsmsg_xml_parse_tag(xmlparser_t *xp, htsmsg_t *parent, char *src)
     if(tagname[i] == ':') {
 
       LIST_FOREACH(ns, &xp->xp_namespaces, xmlns_global_link) {
-       if(ns->xmlns_prefix_len == i && 
+       if(ns->xmlns_prefix_len == i &&
           !memcmp(ns->xmlns_prefix, tagname, ns->xmlns_prefix_len)) {
 
          int llen = taglen - i - 1;
@@ -608,7 +608,7 @@ htsmsg_xml_parse_cd0
       src = htsmsg_xml_parse_tag(xp, tags, src);
       continue;
     }
-    
+
     if(*src == '&' && !raw) {
       if(cc != NULL)
        cc->cc_end = src;
@@ -664,7 +664,7 @@ htsmsg_xml_parse_cd(xmlparser_t *xp, htsmsg_t *parent, char *src)
   int c = 0, l, y = 0;
   char *x, *body;
   htsmsg_t *tags = htsmsg_create_map();
-  
+
   TAILQ_INIT(&ccq);
   src = htsmsg_xml_parse_cd0(xp, &ccq, tags, NULL, src, 0);
 
@@ -697,7 +697,7 @@ htsmsg_xml_parse_cd(xmlparser_t *xp, htsmsg_t *parent, char *src)
 
     assert(cc != NULL);
     assert(TAILQ_NEXT(cc, cc_link) == NULL);
-    
+
     f = htsmsg_field_add(parent, "cdata", HMF_STR, 0, 0);
     f->hmf_str = cc->cc_start;
     *cc->cc_end = 0;
@@ -721,7 +721,7 @@ htsmsg_xml_parse_cd(xmlparser_t *xp, htsmsg_t *parent, char *src)
          c += put_utf8(body + c, *x);
        break;
       }
-      
+
       TAILQ_REMOVE(&ccq, cc, cc_link);
       free(cc);
     }
@@ -767,7 +767,7 @@ htsmsg_parse_prolog(xmlparser_t *xp, char *src)
 
     while(is_xmlws(*src))
       src++;
-    
+
     if(!strncmp(src, "<?", 2)) {
       src += 2;
       src = htsmsg_xml_parse_pi(xp, pis, src);
@@ -849,7 +849,7 @@ htsmsg_xml_deserialize(char *src, char *errbuf, size_t errbufsize)
  err:
   free(src0);
   snprintf(errbuf, errbufsize, "%s", xp.xp_errmsg);
-  
+
   /* Remove any odd chars inside of errmsg */
   for ( ; *errbuf; errbuf++)
     if (*errbuf < ' ')
@@ -900,3 +900,235 @@ htsmsg_xml_get_attr_u32(htsmsg_t *tag, const char *name, uint32_t *ret)
   if (attr) return htsmsg_get_u32(attr, name, ret);
   return HTSMSG_ERR_FIELD_NOT_FOUND;
 }
+
+/**
+ * Take an XPath-like string and return a htsmsg object
+ * containing the node path and attributes.
+ * Currently only supports:
+ *    / = node
+ *    @ = attribute
+ *    [] = condition
+ *
+ *    //node1/node2[attrX=value]/@attrY
+ */
+htsmsg_t *
+htsmsg_xml_parse_xpath(const char *xpath)
+{
+  htsmsg_t *m = NULL;    //The whole message
+  htsmsg_t *f = NULL;    //Individual fields within the message
+
+  m = htsmsg_create_map();
+  f = htsmsg_create_map();
+
+  tvhdebug(LS_XMLTV, "Parsing '%s'", xpath);
+
+  int     xpLen = 0;          //Length of the xpath string
+  int     xpType = 0;         //Type of the current xpath character
+  int     xpTypeSaved = 0;    //Type of the current xpath item
+
+  int     inPos = 0;          //Current xpath character position
+
+  int     outPos = 0;         //Current output character position
+  char    outStr[128];        //Output string holding the current item
+  int64_t outType = 0;        //The current item type
+
+  char    condAtt[128];       //The attribute part of the condition item
+  char    condVal[128];       //The value part of the condition item
+
+  char    inPosStr[11];       //Input position as a string for a unique key to the returned htsmsg
+
+  xpLen = strlen(xpath);
+
+  //If the XPath string is too long, abort
+  if (xpLen > sizeof(outStr) - 1)
+  {
+    //Formatting note:
+    //In 64 bit Ubuntu, sizeof() returns a 'long unsigned int'
+    //In 32 bit i386-debian-strech, sizeof() returns an 'unsigned int'
+    //This causes cross compile issues.
+    //'%zu' is supposed to work for 'size_t' variables in C99.
+    
+    tvhtrace(LS_XMLTV, "XPath = '%s' too long, max len = %zu.", xpath, (sizeof(outStr) - 1));
+    return NULL;
+  }
+
+  memset(outStr, 0, sizeof(outStr));
+
+  for(inPos = 0; inPos < xpLen; inPos++)  //Loop through the xpath string
+  {
+    xpType = 0; //Keep byte
+    if(xpath[inPos] == '/')
+    {
+      xpType = 1; //Node
+    }
+    else if (xpath[inPos] == '@' && xpTypeSaved != 3)
+    {
+      xpType = 2; //Attribute
+    }
+    else if (xpath[inPos] == '[')
+    {
+      xpType = 3; //Condition
+    }
+
+    //Add this byte to the existing item
+    if (xpType == 0 && xpath[inPos] != ']')
+    {
+      outStr[outPos] = xpath[inPos];
+      outPos++;
+      outStr[outPos] = 0;
+      outType = xpTypeSaved;
+    }
+    else
+    {
+        xpTypeSaved = xpType;
+    }
+
+    if(inPos == (xpLen - 1) || (xpType != 0 && outPos != 0 ))
+    {
+
+      if(outType != 0)
+      {
+
+        if(outType == 1 && !strcmp(outStr, "text()"))
+        {
+          outType = 4;
+        }
+
+        condAtt[0] = 0;
+        condVal[0] = 0;
+        if(outType == 3)
+        {
+          sscanf(outStr, "@%[^'=']=%s", condAtt, condVal);
+        }
+
+        f = htsmsg_create_map();
+
+        if(outType == 3)
+        {
+          htsmsg_add_str(f, "n", condAtt);  //Condition attribute
+        }
+        else
+        {
+          htsmsg_add_str(f, "n", outStr);   //Name
+        }
+
+        htsmsg_add_s64(f, "t", outType);    //Type
+        htsmsg_add_str(f, "v", condVal);    //Condition value
+
+        snprintf(inPosStr, sizeof(inPosStr), "%d", inPos);
+        htsmsg_add_msg(m, inPosStr, f);
+
+      }
+
+      outPos = 0;
+      outStr[0] = 0;
+      outType = 0;
+    }
+
+  }//END for loop through string
+
+  return m;
+
+}
+
+/**
+ * Take a htsmsg holding an XML object model
+ * and a htsmsg holding an XPath model and
+ * try to match the XPath to a node or
+ * attribute.
+ */
+const char *htsmsg_xml_xpath_search(htsmsg_t *message, htsmsg_t *xpath)
+{
+  htsmsg_t        *temp_msg;
+  htsmsg_t        *temp_path;
+  int64_t         temp_type;
+  htsmsg_field_t  *f;
+  htsmsg_t        *attribs;
+  htsmsg_t        *tags;
+  htsmsg_t        *pass_tags = NULL;
+  const char      *value;
+  const char      *criteria;
+  const char      *str_saved;
+
+  temp_msg = message;
+  str_saved = NULL;
+
+  HTSMSG_FOREACH(f, xpath) {
+
+      temp_path = htsmsg_get_map(xpath, htsmsg_field_name(f));
+      htsmsg_get_s64(temp_path, "t", &temp_type);
+      tvhdebug(LS_XMLTV, "htsmsg_xml_xpath_search '%s' = '%s', '%"PRIu64"', '%s'", htsmsg_field_name(f), htsmsg_get_str(temp_path, "n"), temp_type, htsmsg_get_str(temp_path, "v"));
+
+      if(temp_type == 4)  //This item returns the text of the previous matched XML node
+      {
+        return str_saved;
+      }
+
+      if(temp_type == 1)  //This item deals with an XML node
+      {
+        str_saved = NULL;
+
+        if((tags = htsmsg_get_map(temp_msg, "tags")) == NULL)
+        {
+          tvherror(LS_XMLTV, "Failed to find tags");
+          return NULL;
+        }
+
+        if((pass_tags = htsmsg_get_map(tags, htsmsg_get_str(temp_path, "n"))) == NULL)
+        {
+          tvherror(LS_XMLTV, "Failed to match '%s'", htsmsg_get_str(temp_path, "n"));
+          return NULL;
+        }
+        else
+        {
+          tvhdebug(LS_XMLTV, "Matched node '%s'", htsmsg_get_str(temp_path, "n"));
+          str_saved = htsmsg_get_str(pass_tags, "cdata");
+          temp_msg = pass_tags;
+        }
+      }//END of node type
+
+      if(temp_type == 2 || temp_type == 3)  //This items deal with an XML attribute.
+      {
+        if((attribs = htsmsg_get_map(temp_msg, "attrib")) == NULL) return NULL;
+        if((value = htsmsg_get_str(attribs, htsmsg_get_str(temp_path, "n"))) == NULL)
+        {
+          return NULL;
+        }
+        else
+        {
+          if(temp_type == 2)  //If this is a simple attribute, return the value.
+          {
+            tvhdebug(LS_XMLTV, "Returning attribute value '%s'", value);
+            return value;
+          }//END just return attribute value
+
+          if(temp_type == 3)  //If this is an attribute comparison, compare it.
+          {
+            if((criteria = htsmsg_get_str(temp_path, "v")) == NULL)
+            {
+              tvherror(LS_XMLTV, "NO CRITERIA '%s'", htsmsg_get_str(temp_path, "v"));
+              return NULL;
+            }
+            else
+            {
+              tvhdebug(LS_XMLTV, "COMPARING: '%s' to '%s'", value, criteria);
+              if(!strcmp(value, criteria))
+              {
+                //Continue the search to the next XPath item, but not the next node
+                tvhdebug(LS_XMLTV, "MATCHED: '%s' to '%s'", value, criteria);
+              }
+              else
+              {
+                //Return an abject failure in disgrace.
+                return NULL;
+              }
+            }
+          }//END attribute value comparison
+        }//END found the attribute being searched for
+      }//END of attribute type
+
+  }//END loop through each XPath item.
+
+  return NULL;
+
+}
index c4d4f29dced9c90b3e49327378fb71ae63df4097..b965b4653779f90b52a4c6e1c1a87a809cd11239 100644 (file)
@@ -27,5 +27,7 @@ const char *htsmsg_xml_get_cdata_str (htsmsg_t *tags, const char *tag);
 int htsmsg_xml_get_cdata_u32 (htsmsg_t *tags, const char *tag, uint32_t *u32);
 const char *htsmsg_xml_get_attr_str(htsmsg_t *tag, const char *attr);
 int htsmsg_xml_get_attr_u32(htsmsg_t *tag, const char *attr, uint32_t *u32);
+htsmsg_t *htsmsg_xml_parse_xpath(const char *xpath);
+const char *htsmsg_xml_xpath_search(htsmsg_t *tag, htsmsg_t *xpath);
 
 #endif /* HTSMSG_XML_H_ */
diff --git a/src/webui/static/img/doc/channel/grabber_xpath_fields.png b/src/webui/static/img/doc/channel/grabber_xpath_fields.png
new file mode 100755 (executable)
index 0000000..a69ca48
Binary files /dev/null and b/src/webui/static/img/doc/channel/grabber_xpath_fields.png differ