From: DeltaMikeCharlie <127641886+DeltaMikeCharlie@users.noreply.github.com>
Date: Mon, 8 Dec 2025 03:23:48 +0000 (+1100)
Subject: Add Merge-text searching option.
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bb62d5a79c38d422f3a08c5b0d867800683d1829;p=thirdparty%2Ftvheadend.git
Add Merge-text searching option.
---
diff --git a/docs/class/dvrautorec.md b/docs/class/dvrautorec.md
index 555474a26..8c499e524 100644
--- a/docs/class/dvrautorec.md
+++ b/docs/class/dvrautorec.md
@@ -12,3 +12,9 @@
---
+## Merge-Text Search
+
+inc/dvr_mergetext
+
+---
+
diff --git a/docs/markdown/epg.md b/docs/markdown/epg.md
index d84a00f75..bf0b8c644 100644
--- a/docs/markdown/epg.md
+++ b/docs/markdown/epg.md
@@ -43,7 +43,7 @@ Filter | Function
---------------------------| --------
**All/Now** | Filter between showing all events (*All*), or those that are currently broadcasting (*Now*). Can be used with the other filters.
**Search title...** | Only display events that match the given title.
- | The filter uses case-insensitive regular expressions. If you donât know what a regular expression is, this simply means that you can type just parts of the title and filter on that - thereâs no need for full, exact matching. If the fulltext checkbox is checked, the title text is matched against title, subtitle, summary and description.
+ | The filter uses case-insensitive regular expressions. If you donât know what a regular expression is, this simply means that you can type just parts of the title and filter on that - thereâs no need for full, exact matching. If the fulltext checkbox is checked, the title text is matched against title, subtitle, summary and description. If the mergetext checkbox is checked, the title text is matched against a merger of the title + subtitle + summary + description + credits + keywords. See AutoRec help for more details on using mergetext.
**Filter channel...** | Only display events from the selected channel.
| Channels in the drop down are ordered by name and can be filtered (by name) by typing in the box.
**Filter tag...** | Only display events from channels which are included in the selected tag.
diff --git a/docs/markdown/inc/dvr_mergetext.md b/docs/markdown/inc/dvr_mergetext.md
new file mode 100644
index 000000000..cd5140b74
--- /dev/null
+++ b/docs/markdown/inc/dvr_mergetext.md
@@ -0,0 +1,46 @@
+
+#Full-Text vs Merge-Text Searching Options
+
+The 'Merge-text' search option provides enhancements to the existing 'Full-text' search option. Both options test the regular expression provided against an EPG event's Title, Sub-title (short description), Summary, Description, Credits and Keywords. If both options are selected, only the 'Merge-text' search will be performed.
+
+The 'Full-text' option will test each of the above-mentioned fields one-by-one in isolation. Alternately, the 'Merge-text' option will test all of the above-mentioned fields as a single merged field consisting of values for all of the fields in all of the available languages for the EPG entry in question.
+
+When merging fields, each field is prefixed with a specific code so that search terms can be applied to a specific field.
+
+##Merge-text Field Prefixes
+
+The following field prefixes are used:
+
+Prefix | Field
+--------------------------------|------------------------------
+0x01|Title
+0x02|Subtitle (Short Description)
+0x03|Summary
+0x04|Description
+0x05|Credits
+0x06|Keywords
+0x07|End
+0x09|Field separator (Tab)
+
+####Note: In addition to the field prefixes, a special field separator is provided between individual language elements within a field.
+
+Field prefixes are always provided in the same sequence and will be present even if the field is empty.
+
+##Sample Data:
+
+[0x01][0x09]en[0x09]Event Title[0x09]fr[0x09]Titre de l'événement[0x02][0x09]en[0x09]Event Sub-Title[0x09]fr[0x09]Sous-titre de l'événement[0x03][0x04][0x05][0x06][0x07]
+
+This sample shows an EPG record having an English title of 'Event Title' and a French title of 'Titre de l'événement' as well as an English sub-title of 'Event Sub-Title' and a French sub-title of 'Sous-titre de l'événement'. No other fields contain any data.
+
+####Note: The '[' and ']' characters are only used for illustrative purposes, they are not present in the actual data.
+
+Caution: On systems with constrained resources, Merge-text searches should be used with caution due to the extra system load and overheads required to perform the search.
+
+##Usage Example:
+
+"Find all EPG events whose title contains 'big bang theory' where 'leonard' is mentioned in the sub-title or summary or description, but 'sheldon' is not."
+
+``(?<=\\x01).*big bang theory.*(?=\\x02).*?(?<=\\x02)(?!.*sheldon).*leonard.*(?=\\x05)``
+
+Confining the first criteria 'big bang theory' to be between a ``\\x01`` and a ``\\x02`` restricts matches to text within the 'Title' field. Confining the second criteria to be in between a ``\\x02`` and a ``\\x05`` restricts matches to text in the merged 'Sub-title', 'Summary' or 'Description' fields.
+
diff --git a/src/api/api_epg.c b/src/api/api_epg.c
index d82956d94..9fc3ae097 100644
--- a/src/api/api_epg.c
+++ b/src/api/api_epg.c
@@ -372,6 +372,7 @@ api_epg_grid
if (str)
eq.stitle = strdup(str);
eq.fulltext = htsmsg_get_bool_or_default(args, "fulltext", 0);
+ eq.mergetext = htsmsg_get_bool_or_default(args, "mergetext", 0);
eq.new_only = htsmsg_get_bool_or_default(args, "new", 0);
str = htsmsg_get_str(args, "channel");
if (str)
diff --git a/src/dvr/dvr.h b/src/dvr/dvr.h
index d3692ab6e..ce85a59c4 100644
--- a/src/dvr/dvr.h
+++ b/src/dvr/dvr.h
@@ -392,6 +392,7 @@ typedef struct dvr_autorec_entry {
char *dae_title;
tvh_regex_t dae_title_regex;
int dae_fulltext;
+ int dae_mergetext;
uint32_t dae_content_type;
/* These categories (mainly from xmltv) such as Cooking, Dog racing, Movie.
diff --git a/src/dvr/dvr_autorec.c b/src/dvr/dvr_autorec.c
index 9258e65ef..579ca0ee0 100644
--- a/src/dvr/dvr_autorec.c
+++ b/src/dvr/dvr_autorec.c
@@ -193,6 +193,8 @@ dvr_autorec_cmp(dvr_autorec_entry_t *dae, epg_broadcast_t *e)
idnode_list_mapping_t *ilm;
dvr_config_t *cfg;
double duration;
+ char *mergedtext = NULL;
+ int mergedtextResult = 0;
if (!e) return 0;
if (!e->channel) return 0;
@@ -350,32 +352,58 @@ dvr_autorec_cmp(dvr_autorec_entry_t *dae, epg_broadcast_t *e)
/* Do not check title if the event is from the serieslink group */
if((dae->dae_serieslink_uri == NULL || dae->dae_serieslink_uri[0] == '\0') &&
dae->dae_title != NULL && dae->dae_title[0] != '\0') {
- lang_str_ele_t *ls;
- if (!dae->dae_fulltext) {
- if(!e->title) return 0;
- RB_FOREACH(ls, e->title, link)
- if (!regex_match(&dae->dae_title_regex, ls->str)) break;
- } else {
- ls = NULL;
- if (e->title)
+ lang_str_ele_t *ls = NULL;
+
+ //Because a mergetext search is more comprehensive than a full text
+ //search, if mergetext is enabled, it takes priority over fulltext.
+ if (!dae->dae_mergetext)
+ {
+ //Only consider doing a fulltext if we are NOT doing a mergetext search.
+ if (!dae->dae_fulltext) {
+ if(!e->title) return 0;
RB_FOREACH(ls, e->title, link)
if (!regex_match(&dae->dae_title_regex, ls->str)) break;
- if (!ls && e->subtitle)
- RB_FOREACH(ls, e->subtitle, link)
- if (!regex_match(&dae->dae_title_regex, ls->str)) break;
- if (!ls && e->summary)
- RB_FOREACH(ls, e->summary, link)
- if (!regex_match(&dae->dae_title_regex, ls->str)) break;
- if (!ls && e->description)
- RB_FOREACH(ls, e->description, link)
- if (!regex_match(&dae->dae_title_regex, ls->str)) break;
- if (!ls && e->credits_cached)
- RB_FOREACH(ls, e->credits_cached, link)
- if (!regex_match(&dae->dae_title_regex, ls->str)) break;
- if (!ls && e->keyword_cached)
- RB_FOREACH(ls, e->keyword_cached, link)
- if (!regex_match(&dae->dae_title_regex, ls->str)) break;
+ } else {
+ ls = NULL;
+ if (e->title)
+ RB_FOREACH(ls, e->title, link)
+ if (!regex_match(&dae->dae_title_regex, ls->str)) break;
+ if (!ls && e->subtitle)
+ RB_FOREACH(ls, e->subtitle, link)
+ if (!regex_match(&dae->dae_title_regex, ls->str)) break;
+ if (!ls && e->summary)
+ RB_FOREACH(ls, e->summary, link)
+ if (!regex_match(&dae->dae_title_regex, ls->str)) break;
+ if (!ls && e->description)
+ RB_FOREACH(ls, e->description, link)
+ if (!regex_match(&dae->dae_title_regex, ls->str)) break;
+ if (!ls && e->credits_cached)
+ RB_FOREACH(ls, e->credits_cached, link)
+ if (!regex_match(&dae->dae_title_regex, ls->str)) break;
+ if (!ls && e->keyword_cached)
+ RB_FOREACH(ls, e->keyword_cached, link)
+ if (!regex_match(&dae->dae_title_regex, ls->str)) break;
+ }//END fulltext block
}
+ else
+ {
+ mergedtextResult = 0;
+ mergedtext = epg_broadcast_get_merged_text(e); //'e' is the EPG record being merged.
+ if(mergedtext)
+ {
+ mergedtextResult = regex_match(&dae->dae_title_regex, mergedtext);
+ free(mergedtext);
+ if(!mergedtextResult)
+ {
+ return 1;
+ }
+ }
+ else
+ {
+ return 0; //To get here, epg_broadcast_get_merged_text() returned NULL.
+ }
+ }//END mergetext block
+
if (!ls) return 0;
}
@@ -1170,6 +1198,16 @@ const idclass_t dvr_autorec_entry_class = {
"matched against title, subtitle, summary and description."),
.off = offsetof(dvr_autorec_entry_t, dae_fulltext),
},
+ {
+ .type = PT_BOOL,
+ .id = "mergetext",
+ .name = N_("Merge-text"),
+ .desc = N_("When 'Merge-Text' is selected, the title pattern is "
+ "matched against a merged single string consisting of the "
+ "title + subtitle + summary + description + credits + keywords "
+ "for all languages contained in the EPG entry being searched."),
+ .off = offsetof(dvr_autorec_entry_t, dae_mergetext),
+ },
{
.type = PT_STR,
.id = "channel",
diff --git a/src/epg.c b/src/epg.c
index 588b8753c..720c2db54 100644
--- a/src/epg.c
+++ b/src/epg.c
@@ -1603,6 +1603,78 @@ const char *epg_broadcast_get_description ( epg_broadcast_t *b, const char *lang
return lang_str_get(b->description, lang);
}
+/**
+ * Take all of the string fields from an EPG record and concatenate
+ * them into a monolithic merged string.
+ *
+ * Used for Autorec creation and interactive EPG search.
+ *
+ * [0x01][0x09][0x09][0x02][0x09][0x09][0x09][0x03][0x04][0x05][0x06][0x07]
+ *
+ * 0x01 = Title
+ * 0x02 = Subtitle (Short Description)
+ * 0x03 = Summary
+ * 0x04 = Description
+ * 0x05 = Credits
+ * 0x06 = Keywords
+ * 0x07 = Terminator
+ *
+ * 0x09 = Field separator (Tab)
+ *
+ */
+char* epg_broadcast_get_merged_text ( epg_broadcast_t *b )
+{
+
+ if (!b) return NULL;
+
+ size_t string_size = 8; //Allow for a field mark for each field, even if null.
+ lang_str_ele_t *ls;
+ char *mergedtext = NULL;
+ size_t output_pos = 0;
+
+ lang_str_t *fields[] = {
+ b->title, b->subtitle, b->summary, b->description, b->credits_cached, b->keyword_cached
+ };
+
+ //First work out the concatenated string length
+ int i = 0; //Some older compiler versions don't like the variable declaration at the start of the for loop.
+ for (i = 0; i < 6; i++) {
+ if (fields[i]) {
+ RB_FOREACH(ls, fields[i], link) {
+ string_size += strlen(ls->str) + strlen(ls->lang) + 2; // 2 separators
+ }
+ }
+ }
+
+ //Now allocate a string big enough to hold the merged EPG fields.
+ mergedtext = calloc(string_size, 1);
+ if (!mergedtext) {
+ tvhinfo(LS_EPG, "Unable to allocate string size '%zu' for merged text search. Skipping search.", string_size);
+ return NULL;
+ }
+
+ //Concatenate all of the EPG strings.
+ for (i = 0; i < 6; i++) {
+ mergedtext[output_pos++] = i + 1; // Field codes 0x01 to 0x06
+ if (fields[i]) {
+ RB_FOREACH(ls, fields[i], link) {
+ mergedtext[output_pos++] = 0x09;
+ size_t lang_len = strlen(ls->lang);
+ memcpy(mergedtext + output_pos, ls->lang, lang_len);
+ output_pos += lang_len;
+ mergedtext[output_pos++] = 0x09;
+ size_t str_len = strlen(ls->str);
+ memcpy(mergedtext + output_pos, ls->str, str_len);
+ output_pos += str_len;
+ }
+ }
+ }
+
+ mergedtext[output_pos++] = 0x07; //Add a terminator
+
+ return mergedtext;
+}//END epg_broadcast_get_merged_text
+
void epg_broadcast_get_epnum ( const epg_broadcast_t *b, epg_episode_num_t *num )
{
if (!b || !num) {
@@ -2254,6 +2326,9 @@ _eq_add ( epg_query_t *eq, epg_broadcast_t *e )
{
const char *s, *lang = eq->lang;
int fulltext = eq->stitle && eq->fulltext;
+ int mergetext = eq->stitle && eq->mergetext;
+ char *mergedtext = NULL;
+ int mergedtextResult = 0;
/* Filtering */
if (e == NULL) return;
@@ -2308,7 +2383,25 @@ _eq_add ( epg_query_t *eq, epg_broadcast_t *e )
if (!e->is_new)
return;
}
- if (fulltext) {
+
+ //Search EPG text fields concatenated into one huge string.
+ if(mergetext)
+ {
+ mergedtextResult = 0;
+ mergedtext = epg_broadcast_get_merged_text(e);
+ if(mergedtext)
+ {
+ mergedtextResult = regex_match(&eq->stitle_re, mergedtext);
+ free(mergedtext);
+ if(mergedtextResult)
+ {
+ return;
+ }
+ }
+ }//END mergetext
+
+ //A mergetext search takes priority over a fulltext search.
+ if (fulltext && !mergetext) {
if ((s = epg_broadcast_get_title(e, lang)) == NULL ||
regex_match(&eq->stitle_re, s)) {
if ((s = epg_broadcast_get_subtitle(e, lang)) == NULL ||
@@ -2328,10 +2421,11 @@ _eq_add ( epg_query_t *eq, epg_broadcast_t *e )
}
}
}
- }
- if (eq->title.comp != EC_NO || (eq->stitle && !fulltext)) {
+ }//END fulltext
+
+ if (eq->title.comp != EC_NO || (eq->stitle && !(fulltext || mergetext))) {
if ((s = epg_broadcast_get_title(e, lang)) == NULL) return;
- if (eq->stitle && !fulltext && regex_match(&eq->stitle_re, s)) return;
+ if (eq->stitle && !(fulltext || mergetext) && regex_match(&eq->stitle_re, s)) return;
if (eq->title.comp != EC_NO && _eq_comp_str(&eq->title, s)) return;
}
if (eq->subtitle.comp != EC_NO) {
diff --git a/src/epg.h b/src/epg.h
index 930ffd911..cce9c09d2 100644
--- a/src/epg.h
+++ b/src/epg.h
@@ -448,6 +448,8 @@ const char *epg_broadcast_get_keyword_cached
( epg_broadcast_t *b, const char *lang );
const ratinglabel_t *epg_broadcast_get_rating_label
( epg_broadcast_t *b );
+char* epg_broadcast_get_merged_text
+ ( epg_broadcast_t *b );
/* Episode number heplers */
// Note: this does NOT strdup the text field
@@ -534,6 +536,7 @@ typedef struct epg_query {
char *stitle;
tvh_regex_t stitle_re;
int fulltext;
+ int mergetext;
int new_only;
char *channel;
char *channel_tag;
diff --git a/src/htsp_server.c b/src/htsp_server.c
index 5ca20a65f..3723af217 100644
--- a/src/htsp_server.c
+++ b/src/htsp_server.c
@@ -607,6 +607,8 @@ htsp_serierec_convert(htsp_connection_t *htsp, htsmsg_t *in, channel_t *ch, int
htsmsg_add_u32(conf, "maxduration", !retval ? u32 : 0); // 0 = any
if (!(retval = htsmsg_get_u32(in, "fulltext", &u32)) || add)
htsmsg_add_u32(conf, "fulltext", !retval ? u32 : 0); // 0 = off
+ if (!(retval = htsmsg_get_u32(in, "mergetext", &u32)) || add)
+ htsmsg_add_u32(conf, "mergetext", !retval ? u32 : 0); // 0 = off
if (!(retval = htsmsg_get_u32(in, "dupDetect", &u32)) || add)
htsmsg_add_u32(conf, "record", !retval ? u32 : DVR_AUTOREC_RECORD_ALL);
if (!(retval = htsmsg_get_u32(in, "maxCount", &u32)) || add)
@@ -1247,6 +1249,7 @@ htsp_build_autorecentry(htsp_connection_t *htsp, dvr_autorec_entry_t *dae, const
if(dae->dae_title) {
htsmsg_add_str(out, "title", dae->dae_title);
htsmsg_add_u32(out, "fulltext", dae->dae_fulltext >= 1 ? 1 : 0);
+ htsmsg_add_u32(out, "mergetext", dae->dae_mergetext >= 1 ? 1 : 0);
}
htsmsg_add_str2(out, "name", dae->dae_name);
if(dae->dae_directory)
@@ -1879,6 +1882,9 @@ htsp_method_epgQuery(htsp_connection_t *htsp, htsmsg_t *in)
if(htsmsg_get_bool_or_default(in, "fulltext", 0))
eq.fulltext = 1;
+ if(htsmsg_get_bool_or_default(in, "mergetext", 0))
+ eq.mergetext = 1;
+
eq.stitle = strdup(query);
/* Optional */
diff --git a/src/webui/static/app/dvr.js b/src/webui/static/app/dvr.js
index 8c5a50815..1404603b9 100644
--- a/src/webui/static/app/dvr.js
+++ b/src/webui/static/app/dvr.js
@@ -1042,7 +1042,7 @@ tvheadend.dvr_settings = function(panel, index) {
*/
tvheadend.autorec_editor = function(panel, index) {
- var list = 'name,title,fulltext,channel,start,start_window,weekdays,' +
+ var list = 'name,title,fulltext,mergetext,channel,start,start_window,weekdays,' +
'record,tag,btype,content_type,cat1,cat2,cat3,minduration,maxduration,minyear,maxyear,minseason,maxseason,' +
'star_rating,dedup,directory,config_name,comment,pri,serieslink';
var elist = 'enabled,start_extra,stop_extra,' +
@@ -1061,6 +1061,7 @@ tvheadend.autorec_editor = function(panel, index) {
directory: { width: 200 },
title: { width: 300 },
fulltext: { width: 70 },
+ mergetext: { width: 70 },
channel: { width: 200 },
tag: { width: 200 },
btype: { width: 50 },
@@ -1109,7 +1110,7 @@ tvheadend.autorec_editor = function(panel, index) {
},
},
del: true,
- list: 'enabled,name,title,fulltext,channel,tag,start,start_window,' +
+ list: 'enabled,name,title,fulltext,mergetext,channel,tag,start,start_window,' +
'weekdays,minduration,maxduration,record,btype,content_type,cat1,cat2,cat3' +
'star_rating,pri,dedup,directory,config_name,minseason,maxseason,minyear,maxyear,owner,creator,comment,serieslink',
sort: {
diff --git a/src/webui/static/app/epg.js b/src/webui/static/app/epg.js
index 1639c872b..e786c1ebb 100644
--- a/src/webui/static/app/epg.js
+++ b/src/webui/static/app/epg.js
@@ -955,6 +955,10 @@ tvheadend.epg = function() {
width: 20
});
+ let epgFilterMergetext = new Ext.form.Checkbox({
+ width: 20
+ });
+
var epgFilterNewOnly = new Ext.form.Checkbox({
width: 20
});
@@ -1144,6 +1148,11 @@ tvheadend.epg = function() {
epgFilterFulltext.setValue(0);
};
+ let clearMergetextFilter = function() {
+ delete epgStore.baseParams.mergetext;
+ epgFilterMergetext.setValue(0);
+ };
+
clearNewOnlyFilter = function() {
delete epgStore.baseParams.newOnly;
epgFilterNewOnly.setValue(0);
@@ -1179,6 +1188,7 @@ tvheadend.epg = function() {
clearModeFilter();
clearTitleFilter();
clearFulltextFilter();
+ clearMergetextFilter();
clearNewOnlyFilter();
clearChannelFilter();
clearChannelTagsFilter();
@@ -1275,6 +1285,13 @@ tvheadend.epg = function() {
}
});
+ epgFilterMergetext.on('check', function(c, value) {
+ if (epgStore.baseParams.mergetext !== value) {
+ epgStore.baseParams.mergetext = value;
+ epgView.reset();
+ }
+ });
+
epgFilterNewOnly.on('check', function(c, value) {
if (epgStore.baseParams.new !== value) {
epgStore.baseParams.new = value;
@@ -1306,7 +1323,7 @@ tvheadend.epg = function() {
var tbar = [
epgMode, '-',
- epgFilterTitle, { text: _('Fulltext') }, epgFilterFulltext, { text: _('New only') }, epgFilterNewOnly, '-',
+ epgFilterTitle, { text: _('Fulltext') }, epgFilterFulltext, { text: _('Mergetext') }, epgFilterMergetext, { text: _('New only') }, epgFilterNewOnly, '-',
epgPrevChannel, epgFilterChannels, epgNextChannel, '-',
epgFilterChannelTags, '-',
epgFilterContentGroup, '-',
@@ -1484,6 +1501,9 @@ tvheadend.epg = function() {
var fulltext = epgStore.baseParams.fulltext ?
" (" + _("Fulltext") + ")"
: "";
+ let mergetext = epgStore.baseParams.mergetext ?
+ " (" + _("Mergetext") + ")"
+ : "";
var newOnly = epgStore.baseParams.new ?
" (" + _("New only") + ")"
: "";
@@ -1509,7 +1529,7 @@ tvheadend.epg = function() {
Ext.MessageBox.confirm(_('Auto Recorder'), _('This will create an automatic rule that '
+ 'continuously scans the EPG for programs '
+ 'to record that match this query') + ': ' + '
'
- + '' + _('Title') + ':
' + title + fulltext + newOnly + '
'
+ + '' + _('Title') + ':
' + title + fulltext + mergetext + newOnly + '
'
+ '' + _('Channel') + ':
' + channel + '
'
+ '' + _('Tag') + ':
' + tag + '
'
+ '' + _('Genre') + ':
' + contentType + '
'
@@ -1538,6 +1558,7 @@ tvheadend.epg = function() {
conf.comment = conf.title + _(' - ') + conf.comment;
}
if (params.fulltext) conf.fulltext = params.fulltext;
+ if (params.mergetext) conf.mergetext = params.mergetext;
if (params.new) conf.btype = 3; // DVR_AUTOREC_BTYPE_NEW in dvr.h has value 3.
if (params.channel) conf.channel = params.channel;
if (params.channelTag) conf.tag = params.channelTag;