---
+## Merge-Text Search
+
+<tvh_include>inc/dvr_mergetext</tvh_include>
+
+---
+
---------------------------| --------
**All/Now** | Filter between showing all events (*All*), or those that are currently broadcasting (*Now*). Can be used with the other filters.
**Search title...** | Only display events that match the given title.
- | The filter uses case-insensitive regular expressions. If you don’t know what a regular expression is, this simply means that you can type just parts of the title and filter on that - there’s no need for full, exact matching. If the fulltext checkbox is checked, the title text is matched against title, subtitle, summary and description.
+ | The filter uses case-insensitive regular expressions. If you don’t know what a regular expression is, this simply means that you can type just parts of the title and filter on that - there’s no need for full, exact matching. If the fulltext checkbox is checked, the title text is matched against title, subtitle, summary and description. If the mergetext checkbox is checked, the title text is matched against a merger of the title + subtitle + summary + description + credits + keywords. See AutoRec help for more details on using mergetext.
**Filter channel...** | Only display events from the selected channel.
| Channels in the drop down are ordered by name and can be filtered (by name) by typing in the box.
**Filter tag...** | Only display events from channels which are included in the selected tag.
--- /dev/null
+
+#Full-Text vs Merge-Text Searching Options
+
+The 'Merge-text' search option provides enhancements to the existing 'Full-text' search option. Both options test the regular expression provided against an EPG event's Title, Sub-title (short description), Summary, Description, Credits and Keywords. If both options are selected, only the 'Merge-text' search will be performed.
+
+The 'Full-text' option will test each of the above-mentioned fields one-by-one in isolation. Alternately, the 'Merge-text' option will test all of the above-mentioned fields as a single merged field consisting of values for all of the fields in all of the available languages for the EPG entry in question.
+
+When merging fields, each field is prefixed with a specific code so that search terms can be applied to a specific field.
+
+##Merge-text Field Prefixes
+
+The following field prefixes are used:
+
+Prefix | Field
+--------------------------------|------------------------------
+0x01|Title
+0x02|Subtitle (Short Description)
+0x03|Summary
+0x04|Description
+0x05|Credits
+0x06|Keywords
+0x07|End
+0x09|Field separator (Tab)
+
+####Note: In addition to the field prefixes, a special field separator is provided between individual language elements within a field.
+
+Field prefixes are always provided in the same sequence and will be present even if the field is empty.
+
+##Sample Data:
+
+[0x01][0x09]en[0x09]Event Title[0x09]fr[0x09]Titre de l'événement[0x02][0x09]en[0x09]Event Sub-Title[0x09]fr[0x09]Sous-titre de l'événement[0x03][0x04][0x05][0x06][0x07]
+
+This sample shows an EPG record having an English title of 'Event Title' and a French title of 'Titre de l'événement' as well as an English sub-title of 'Event Sub-Title' and a French sub-title of 'Sous-titre de l'événement'. No other fields contain any data.
+
+####Note: The '[' and ']' characters are only used for illustrative purposes, they are not present in the actual data.
+
+Caution: On systems with constrained resources, Merge-text searches should be used with caution due to the extra system load and overheads required to perform the search.
+
+##Usage Example:
+
+"Find all EPG events whose title contains 'big bang theory' where 'leonard' is mentioned in the sub-title or summary or description, but 'sheldon' is not."
+
+``(?<=\\x01).*big bang theory.*(?=\\x02).*?(?<=\\x02)(?!.*sheldon).*leonard.*(?=\\x05)``
+
+Confining the first criteria 'big bang theory' to be between a ``\\x01`` and a ``\\x02`` restricts matches to text within the 'Title' field. Confining the second criteria to be in between a ``\\x02`` and a ``\\x05`` restricts matches to text in the merged 'Sub-title', 'Summary' or 'Description' fields.
+
if (str)
eq.stitle = strdup(str);
eq.fulltext = htsmsg_get_bool_or_default(args, "fulltext", 0);
+ eq.mergetext = htsmsg_get_bool_or_default(args, "mergetext", 0);
eq.new_only = htsmsg_get_bool_or_default(args, "new", 0);
str = htsmsg_get_str(args, "channel");
if (str)
char *dae_title;
tvh_regex_t dae_title_regex;
int dae_fulltext;
+ int dae_mergetext;
uint32_t dae_content_type;
/* These categories (mainly from xmltv) such as Cooking, Dog racing, Movie.
idnode_list_mapping_t *ilm;
dvr_config_t *cfg;
double duration;
+ char *mergedtext = NULL;
+ int mergedtextResult = 0;
if (!e) return 0;
if (!e->channel) return 0;
/* Do not check title if the event is from the serieslink group */
if((dae->dae_serieslink_uri == NULL || dae->dae_serieslink_uri[0] == '\0') &&
dae->dae_title != NULL && dae->dae_title[0] != '\0') {
- lang_str_ele_t *ls;
- if (!dae->dae_fulltext) {
- if(!e->title) return 0;
- RB_FOREACH(ls, e->title, link)
- if (!regex_match(&dae->dae_title_regex, ls->str)) break;
- } else {
- ls = NULL;
- if (e->title)
+ lang_str_ele_t *ls = NULL;
+
+ //Because a mergetext search is more comprehensive than a full text
+ //search, if mergetext is enabled, it takes priority over fulltext.
+ if (!dae->dae_mergetext)
+ {
+ //Only consider doing a fulltext if we are NOT doing a mergetext search.
+ if (!dae->dae_fulltext) {
+ if(!e->title) return 0;
RB_FOREACH(ls, e->title, link)
if (!regex_match(&dae->dae_title_regex, ls->str)) break;
- if (!ls && e->subtitle)
- RB_FOREACH(ls, e->subtitle, link)
- if (!regex_match(&dae->dae_title_regex, ls->str)) break;
- if (!ls && e->summary)
- RB_FOREACH(ls, e->summary, link)
- if (!regex_match(&dae->dae_title_regex, ls->str)) break;
- if (!ls && e->description)
- RB_FOREACH(ls, e->description, link)
- if (!regex_match(&dae->dae_title_regex, ls->str)) break;
- if (!ls && e->credits_cached)
- RB_FOREACH(ls, e->credits_cached, link)
- if (!regex_match(&dae->dae_title_regex, ls->str)) break;
- if (!ls && e->keyword_cached)
- RB_FOREACH(ls, e->keyword_cached, link)
- if (!regex_match(&dae->dae_title_regex, ls->str)) break;
+ } else {
+ ls = NULL;
+ if (e->title)
+ RB_FOREACH(ls, e->title, link)
+ if (!regex_match(&dae->dae_title_regex, ls->str)) break;
+ if (!ls && e->subtitle)
+ RB_FOREACH(ls, e->subtitle, link)
+ if (!regex_match(&dae->dae_title_regex, ls->str)) break;
+ if (!ls && e->summary)
+ RB_FOREACH(ls, e->summary, link)
+ if (!regex_match(&dae->dae_title_regex, ls->str)) break;
+ if (!ls && e->description)
+ RB_FOREACH(ls, e->description, link)
+ if (!regex_match(&dae->dae_title_regex, ls->str)) break;
+ if (!ls && e->credits_cached)
+ RB_FOREACH(ls, e->credits_cached, link)
+ if (!regex_match(&dae->dae_title_regex, ls->str)) break;
+ if (!ls && e->keyword_cached)
+ RB_FOREACH(ls, e->keyword_cached, link)
+ if (!regex_match(&dae->dae_title_regex, ls->str)) break;
+ }//END fulltext block
}
+ else
+ {
+ mergedtextResult = 0;
+ mergedtext = epg_broadcast_get_merged_text(e); //'e' is the EPG record being merged.
+ if(mergedtext)
+ {
+ mergedtextResult = regex_match(&dae->dae_title_regex, mergedtext);
+ free(mergedtext);
+ if(!mergedtextResult)
+ {
+ return 1;
+ }
+ }
+ else
+ {
+ return 0; //To get here, epg_broadcast_get_merged_text() returned NULL.
+ }
+ }//END mergetext block
+
if (!ls) return 0;
}
"matched against title, subtitle, summary and description."),
.off = offsetof(dvr_autorec_entry_t, dae_fulltext),
},
+ {
+ .type = PT_BOOL,
+ .id = "mergetext",
+ .name = N_("Merge-text"),
+ .desc = N_("When 'Merge-Text' is selected, the title pattern is "
+ "matched against a merged single string consisting of the "
+ "title + subtitle + summary + description + credits + keywords "
+ "for all languages contained in the EPG entry being searched."),
+ .off = offsetof(dvr_autorec_entry_t, dae_mergetext),
+ },
{
.type = PT_STR,
.id = "channel",
return lang_str_get(b->description, lang);
}
+/**
+ * Take all of the string fields from an EPG record and concatenate
+ * them into a monolithic merged string.
+ *
+ * Used for Autorec creation and interactive EPG search.
+ *
+ * [0x01]<TITLE_LANG1>[0x09]<TITLE_TEXT1>[0x09]<TITLE_LANG2><TITLE_TEXT2>[0x02]<SHORT_DESC_LANG1>[0x09]<SHORT_DESC_TEXT1>[0x09]<SHORT_DESC_LANG2>[0x09]<SHORT_DESCT_EXT2>[0x03][0x04][0x05][0x06][0x07]
+ *
+ * 0x01 = Title
+ * 0x02 = Subtitle (Short Description)
+ * 0x03 = Summary
+ * 0x04 = Description
+ * 0x05 = Credits
+ * 0x06 = Keywords
+ * 0x07 = Terminator
+ *
+ * 0x09 = Field separator (Tab)
+ *
+ */
+char* epg_broadcast_get_merged_text ( epg_broadcast_t *b )
+{
+
+ if (!b) return NULL;
+
+ size_t string_size = 8; //Allow for a field mark for each field, even if null.
+ lang_str_ele_t *ls;
+ char *mergedtext = NULL;
+ size_t output_pos = 0;
+
+ lang_str_t *fields[] = {
+ b->title, b->subtitle, b->summary, b->description, b->credits_cached, b->keyword_cached
+ };
+
+ //First work out the concatenated string length
+ int i = 0; //Some older compiler versions don't like the variable declaration at the start of the for loop.
+ for (i = 0; i < 6; i++) {
+ if (fields[i]) {
+ RB_FOREACH(ls, fields[i], link) {
+ string_size += strlen(ls->str) + strlen(ls->lang) + 2; // 2 separators
+ }
+ }
+ }
+
+ //Now allocate a string big enough to hold the merged EPG fields.
+ mergedtext = calloc(string_size, 1);
+ if (!mergedtext) {
+ tvhinfo(LS_EPG, "Unable to allocate string size '%zu' for merged text search. Skipping search.", string_size);
+ return NULL;
+ }
+
+ //Concatenate all of the EPG strings.
+ for (i = 0; i < 6; i++) {
+ mergedtext[output_pos++] = i + 1; // Field codes 0x01 to 0x06
+ if (fields[i]) {
+ RB_FOREACH(ls, fields[i], link) {
+ mergedtext[output_pos++] = 0x09;
+ size_t lang_len = strlen(ls->lang);
+ memcpy(mergedtext + output_pos, ls->lang, lang_len);
+ output_pos += lang_len;
+ mergedtext[output_pos++] = 0x09;
+ size_t str_len = strlen(ls->str);
+ memcpy(mergedtext + output_pos, ls->str, str_len);
+ output_pos += str_len;
+ }
+ }
+ }
+
+ mergedtext[output_pos++] = 0x07; //Add a terminator
+
+ return mergedtext;
+}//END epg_broadcast_get_merged_text
+
void epg_broadcast_get_epnum ( const epg_broadcast_t *b, epg_episode_num_t *num )
{
if (!b || !num) {
{
const char *s, *lang = eq->lang;
int fulltext = eq->stitle && eq->fulltext;
+ int mergetext = eq->stitle && eq->mergetext;
+ char *mergedtext = NULL;
+ int mergedtextResult = 0;
/* Filtering */
if (e == NULL) return;
if (!e->is_new)
return;
}
- if (fulltext) {
+
+ //Search EPG text fields concatenated into one huge string.
+ if(mergetext)
+ {
+ mergedtextResult = 0;
+ mergedtext = epg_broadcast_get_merged_text(e);
+ if(mergedtext)
+ {
+ mergedtextResult = regex_match(&eq->stitle_re, mergedtext);
+ free(mergedtext);
+ if(mergedtextResult)
+ {
+ return;
+ }
+ }
+ }//END mergetext
+
+ //A mergetext search takes priority over a fulltext search.
+ if (fulltext && !mergetext) {
if ((s = epg_broadcast_get_title(e, lang)) == NULL ||
regex_match(&eq->stitle_re, s)) {
if ((s = epg_broadcast_get_subtitle(e, lang)) == NULL ||
}
}
}
- }
- if (eq->title.comp != EC_NO || (eq->stitle && !fulltext)) {
+ }//END fulltext
+
+ if (eq->title.comp != EC_NO || (eq->stitle && !(fulltext || mergetext))) {
if ((s = epg_broadcast_get_title(e, lang)) == NULL) return;
- if (eq->stitle && !fulltext && regex_match(&eq->stitle_re, s)) return;
+ if (eq->stitle && !(fulltext || mergetext) && regex_match(&eq->stitle_re, s)) return;
if (eq->title.comp != EC_NO && _eq_comp_str(&eq->title, s)) return;
}
if (eq->subtitle.comp != EC_NO) {
( epg_broadcast_t *b, const char *lang );
const ratinglabel_t *epg_broadcast_get_rating_label
( epg_broadcast_t *b );
+char* epg_broadcast_get_merged_text
+ ( epg_broadcast_t *b );
/* Episode number heplers */
// Note: this does NOT strdup the text field
char *stitle;
tvh_regex_t stitle_re;
int fulltext;
+ int mergetext;
int new_only;
char *channel;
char *channel_tag;
htsmsg_add_u32(conf, "maxduration", !retval ? u32 : 0); // 0 = any
if (!(retval = htsmsg_get_u32(in, "fulltext", &u32)) || add)
htsmsg_add_u32(conf, "fulltext", !retval ? u32 : 0); // 0 = off
+ if (!(retval = htsmsg_get_u32(in, "mergetext", &u32)) || add)
+ htsmsg_add_u32(conf, "mergetext", !retval ? u32 : 0); // 0 = off
if (!(retval = htsmsg_get_u32(in, "dupDetect", &u32)) || add)
htsmsg_add_u32(conf, "record", !retval ? u32 : DVR_AUTOREC_RECORD_ALL);
if (!(retval = htsmsg_get_u32(in, "maxCount", &u32)) || add)
if(dae->dae_title) {
htsmsg_add_str(out, "title", dae->dae_title);
htsmsg_add_u32(out, "fulltext", dae->dae_fulltext >= 1 ? 1 : 0);
+ htsmsg_add_u32(out, "mergetext", dae->dae_mergetext >= 1 ? 1 : 0);
}
htsmsg_add_str2(out, "name", dae->dae_name);
if(dae->dae_directory)
if(htsmsg_get_bool_or_default(in, "fulltext", 0))
eq.fulltext = 1;
+ if(htsmsg_get_bool_or_default(in, "mergetext", 0))
+ eq.mergetext = 1;
+
eq.stitle = strdup(query);
/* Optional */
*/
tvheadend.autorec_editor = function(panel, index) {
- var list = 'name,title,fulltext,channel,start,start_window,weekdays,' +
+ var list = 'name,title,fulltext,mergetext,channel,start,start_window,weekdays,' +
'record,tag,btype,content_type,cat1,cat2,cat3,minduration,maxduration,minyear,maxyear,minseason,maxseason,' +
'star_rating,dedup,directory,config_name,comment,pri,serieslink';
var elist = 'enabled,start_extra,stop_extra,' +
directory: { width: 200 },
title: { width: 300 },
fulltext: { width: 70 },
+ mergetext: { width: 70 },
channel: { width: 200 },
tag: { width: 200 },
btype: { width: 50 },
},
},
del: true,
- list: 'enabled,name,title,fulltext,channel,tag,start,start_window,' +
+ list: 'enabled,name,title,fulltext,mergetext,channel,tag,start,start_window,' +
'weekdays,minduration,maxduration,record,btype,content_type,cat1,cat2,cat3' +
'star_rating,pri,dedup,directory,config_name,minseason,maxseason,minyear,maxyear,owner,creator,comment,serieslink',
sort: {
width: 20
});
+ let epgFilterMergetext = new Ext.form.Checkbox({
+ width: 20
+ });
+
var epgFilterNewOnly = new Ext.form.Checkbox({
width: 20
});
epgFilterFulltext.setValue(0);
};
+ let clearMergetextFilter = function() {
+ delete epgStore.baseParams.mergetext;
+ epgFilterMergetext.setValue(0);
+ };
+
clearNewOnlyFilter = function() {
delete epgStore.baseParams.newOnly;
epgFilterNewOnly.setValue(0);
clearModeFilter();
clearTitleFilter();
clearFulltextFilter();
+ clearMergetextFilter();
clearNewOnlyFilter();
clearChannelFilter();
clearChannelTagsFilter();
}
});
+ epgFilterMergetext.on('check', function(c, value) {
+ if (epgStore.baseParams.mergetext !== value) {
+ epgStore.baseParams.mergetext = value;
+ epgView.reset();
+ }
+ });
+
epgFilterNewOnly.on('check', function(c, value) {
if (epgStore.baseParams.new !== value) {
epgStore.baseParams.new = value;
var tbar = [
epgMode, '-',
- epgFilterTitle, { text: _('Fulltext') }, epgFilterFulltext, { text: _('New only') }, epgFilterNewOnly, '-',
+ epgFilterTitle, { text: _('Fulltext') }, epgFilterFulltext, { text: _('Mergetext') }, epgFilterMergetext, { text: _('New only') }, epgFilterNewOnly, '-',
epgPrevChannel, epgFilterChannels, epgNextChannel, '-',
epgFilterChannelTags, '-',
epgFilterContentGroup, '-',
var fulltext = epgStore.baseParams.fulltext ?
" <i>(" + _("Fulltext") + ")</i>"
: "";
+ let mergetext = epgStore.baseParams.mergetext ?
+ " <i>(" + _("Mergetext") + ")</i>"
+ : "";
var newOnly = epgStore.baseParams.new ?
" <i>(" + _("New only") + ")</i>"
: "";
Ext.MessageBox.confirm(_('Auto Recorder'), _('This will create an automatic rule that '
+ 'continuously scans the EPG for programs '
+ 'to record that match this query') + ': ' + '<br><br>'
- + '<div class="x-smallhdr">' + _('Title') + ':</div>' + title + fulltext + newOnly + '<br>'
+ + '<div class="x-smallhdr">' + _('Title') + ':</div>' + title + fulltext + mergetext + newOnly + '<br>'
+ '<div class="x-smallhdr">' + _('Channel') + ':</div>' + channel + '<br>'
+ '<div class="x-smallhdr">' + _('Tag') + ':</div>' + tag + '<br>'
+ '<div class="x-smallhdr">' + _('Genre') + ':</div>' + contentType + '<br>'
conf.comment = conf.title + _(' - ') + conf.comment;
}
if (params.fulltext) conf.fulltext = params.fulltext;
+ if (params.mergetext) conf.mergetext = params.mergetext;
if (params.new) conf.btype = 3; // DVR_AUTOREC_BTYPE_NEW in dvr.h has value 3.
if (params.channel) conf.channel = params.channel;
if (params.channelTag) conf.tag = params.channelTag;