From: E.Smith <31170571+azlm8t@users.noreply.github.com> Date: Thu, 7 Sep 2017 14:26:06 +0000 (+0100) Subject: dvr: Add scraper-friendly format string. (#4667) X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=8d51f5aa96ea83f033972278758a6a495015b82e;p=thirdparty%2Ftvheadend.git dvr: Add scraper-friendly format string. (#4667) We add a few new format strings. This helps split movies and tvshows in to separate folders to make it easier for external programs to scrape. $q and $Q determine if the programme is a movie or a show from the guide data, with variants to force detection as a movie or a show. We then create "queryable" or scrapable names such as: tvmovies/Gladiator (2000) tvshows/Bonanza/Bonanza - S09E18 - The Burning Sky This simplifies the recording format string for people with good guide data since it easily splits the programmes without need of post-processing scripts. We don't split sports, news, etc. in to separate directories purely because it is difficult to identify programmes that are purely those categories. For example a popular nightly politics series is classified as "News", whereas most people think of news as being purely news headlines programmes. We also add variants of $1q and $2q to force the programme to be considered a movie or show ignoring the guide data. The names are chosen to make it easier to add other variants in the future if necessary ($3q, $4q, etc). Issue: #4667 --- diff --git a/docs/property/pathname.md b/docs/property/pathname.md index 221fd6b13..e25a550f7 100644 --- a/docs/property/pathname.md +++ b/docs/property/pathname.md @@ -11,11 +11,14 @@ Format | Description | Example `$e` | Event episode name | S02-E06 `$c` | Channel name | SkySport `$g` | Content type | Movie : Science fiction +`$Q` | Scraper friendly (see below) | Gladiator (2000) + 〃 | 〃 | Bones - S02E06 +`$q` | Scraper friendly with directories (see below) | tvshows/Bones/Bones - S02E06 + 〃 | 〃 | tvmovies/Gladiator (2000) `$n` | Unique number added when the file already exists | -1 `$x` | Filename extension (from the active stream muxer | mkv `%F` | ISO 8601 date format | 2011-03-19 `%R` | The time in 24-hour notation | 14:12 - The format strings `$t`,`$s`,`%e`,`$c` also have delimiter variants such as `$ t` (space after the dollar character), `$-t`, `$_t`, `$.t`, `$,t`, `$;t`. In these cases, the delimiter is applied @@ -24,3 +27,47 @@ only when the substituted string is not empty. For $t and $s format strings, you may also limit the number of output characters using $99-t format string where 99 means the limit. As you can see, the delimiter can be also applied. + +The format strings `$q` and `$Q` generate filenames that are suitable +for many external scrapers. They rely on correct schedule data that correctly +identifies episodes and genres. If your guide data incorrectly +identifies movies as shows then the filenames will be incorrect and +show could be identifies as movies or vice-versa. Any xmltv guide data +should contain the category "movie" for movies. + +The `$q` format will create sub-directories `tvmovies` and `tvshows` +based on the genre in the guide data. For tvshows a second-level +directory based on the title of the show is created. + +Examples are: +- tvmovies/Gladiator (2000) +- tvshows/Countdown/Countdown +- tvshows/Bones/Bones - S05E11 +- tvshows/Bones/Bones - S05E11 - The X in the Files + +The `$Q` format is similar to `$q` but does not use genre sub-directories. +Sub-directories are still created for tvshow episodes. +Examples are below based on different information in the EPG: +- Gladiator (2000) (movie) +- Bones/Bones - S05 E11 (episode with guide season/episode information) +- Countdown/Countdown (episode without guide season/episode information) + +The `$Q` and `$q` formats also have two numeric modifiers to select +variant formats and can be used as `$1Q`, `$2Q`, `$1q`, and `$2q`. + +The number 1 variant forces the recording to be formatted as a movie, +ignoring the genre from the schedule. + +Whereas the number 2 variant forces the recording to be formatted as a +tv series. + +These variants can be useful to work-around bad schedule data that gives +incorrect genres for programmes. + +Typically the `$q` and `$Q` formats would be combined with other +modifiers to generate a complete filename such as `$q$n.$x`. + +Even with correct guide information, external scrapers can retrieve +incorrect results. A famous example being the detective tv series +"Castle" is often incorrectly retrieved as a much earlier tv show +about castles. diff --git a/src/dvr/dvr_rec.c b/src/dvr/dvr_rec.c index 751d182fb..c09859125 100644 --- a/src/dvr/dvr_rec.c +++ b/src/dvr/dvr_rec.c @@ -37,6 +37,7 @@ #include "atomic.h" #include "intlconv.h" #include "notify.h" +#include "string_list.h" #include "muxer.h" @@ -355,6 +356,179 @@ dvr_sub_episode(const char *id, const char *fmt, const void *aux, char *tmp, siz return dvr_do_prefix(id, fmt, buf, tmp, tmplen); } +static const char * +_dvr_sub_scraper_friendly(const char *id, const char *fmt, const void *aux, char *tmp, size_t tmplen, int with_genre_subdir) +{ + char date_buf[MAX(PATH_MAX, 512)] = { 0 }; + char episode_buf[MAX(PATH_MAX, 512)] = { 0 }; + const dvr_entry_t *de = aux; + /* Can't be const due to call to epg_episode_number_format */ + /*const*/ epg_episode_t *episode = de->de_bcast ? de->de_bcast->episode : 0; + + *tmp = 0; + const char *title = lang_str_get(de->de_title, NULL); + const char *subtitle = lang_str_get(de->de_subtitle, NULL); + const char *desc = lang_str_get(de->de_desc, NULL); + + if (subtitle && desc && strcmp(subtitle, desc) == 0) { + /* Subtitle and description are identical so assume they are from + * bad OTA EIT. Some OTA EIT often has a (long) summary which is + * put in to both subtitle and description. So we really don't + * want this to be used as the subtitle field. + */ + subtitle = desc = NULL; + } + + char title_buf[MAX(PATH_MAX, 512)] = { 0 }; + char subtitle_buf[MAX(PATH_MAX, 512)] = { 0 }; + /* Copy a cleaned version in to our buffers. + * Since dvr_clean_directory_separator _can_ modify source if source!=dest + * it means we have to remove our const when we call it. + */ + if (title) + dvr_clean_directory_separator((char*)title, title_buf, sizeof title_buf); + if (subtitle) + dvr_clean_directory_separator((char*)subtitle, subtitle_buf, sizeof subtitle_buf); + + int is_movie = 0; + /* Override options on the format tag. This is useful because my OTA + * for the film channel doesn't have a genre. + */ + if (fmt && *fmt == '1') /* Force to be a movie */ + is_movie = 1; + else if (fmt && *fmt == '2') /* Force to be a series (not a movie) */ + is_movie = 0; + else { + if (de->de_bcast && de->de_bcast->category) { + /* We've parsed categories from xmltv. So check if it has the movie category. */ + is_movie = + string_list_contains_string(de->de_bcast->category, "Movie") || + string_list_contains_string(de->de_bcast->category, "movie") || + string_list_contains_string(de->de_bcast->category, "Film") || + string_list_contains_string(de->de_bcast->category, "film"); + } else { + /* No xmltv categories parsed. So have to use less-accurate genre instead. */ + + /* Magic number from epg.c / EN 300 468 for movie/drama category from OTA */ + is_movie = (de->de_content_type == 1); + if (is_movie) { + /* If here, it is a movie or a drama (not sports, etc). But + * OTA doesn't differentiate movie and episode, so if it has a + * series/episode number then assume must be an episode, + * otherwise we default to movie. + */ + if (episode && (episode->epnum.s_num || episode->epnum.e_num)) + is_movie = 0; + } + } + } + + tvhdebug(LS_DVR, "fmt = %s is_movie = %d content_type = %d", fmt ?: "", is_movie, de->de_content_type); + + if (is_movie) { + /* Include the year if available. This helps scraper differentiate + * between numerous remakes of the same film. + */ + if (episode) { + if (episode->copyright_year) { + sprintf(date_buf, "%04d", episode->copyright_year); + } else { + /* Some providers use first_aired as really the copyright date. */ + const time_t first_aired = episode->first_aired; + if (first_aired) { + /* Get just the year part */ + struct tm tm; + if (localtime_r(&first_aired, &tm)) { + sprintf(date_buf, "%04d", tm.tm_year + 1900); + } + } + } + } + } else { + /* Not a movie */ + if (episode) { + /* Get episode information */ + epg_episode_number_format(episode, + episode_buf, sizeof(episode_buf), + NULL, "S%02d", NULL, "E%02d", NULL); + + const time_t first_aired = episode->first_aired; + if (first_aired) { + /* Get as yyyy-mm-dd since programme could be one episode a day/week, + * unlike films which only needs the year. + */ + struct tm tm; + if (localtime_r(&first_aired, &tm)) { + strftime(date_buf, sizeof date_buf, "%F", &tm); + } + } + } + } + + /* Now we have all our data in place so combine it. + * This is based on examples in: + * http://kodi.wiki/view/Naming_video_files/Movies + * http://kodi.wiki/view/TV_Shows_(Video_Library) + */ + + size_t offset = 0; + + if (is_movie) { + /* TV movies are probably best saved in one folder rather than + * multiple folders since video players such as Kodi can download + * artwork and information for them anyway and it makes deleting + * and moving them easier since they get tracked by inotify on + * just the one directory. + * + * Example format below: + * "tvmovies/title (yyyy)" (with genre_subdir) + * "title (yyyy)" (without genre_subdir) + * "title" (without genre_subdir, no airdate) + */ + if (with_genre_subdir) tvh_strlcatf(tmp, tmplen, offset, "tvmovies/"); + if (*title_buf) tvh_strlcatf(tmp, tmplen, offset, "%s", title_buf); + /* Movies don't have anything relevant in sub-titles field so + * anything there should be ignored. I think some channels store a + * translated movie name there (title=original movie name, + * subtitle=local language name for movie), but only use title + * since scrapers only handle one title. + */ + // if (*subtitle_buf) tvh_strlcatf(tmp, tmplen, offset, " - %s", subtitle_buf); + if (*date_buf) tvh_strlcatf(tmp, tmplen, offset, " (%s)", date_buf); + } else { + /* TV shows have to go in separate directories based on their title in + * order to be scraped properly. + * We put the episode number before the subtitle to make it easier + * to see if we are missing episodes when you do ls. + * + * Example formats below: + * "tvshows/title/title - S01E02 - subtitle" (with genre_subdir) + * "title - S01E02 - subtitle" (without genre_subdir) + * "title - subtitle_2001-05-04" (without genre_subdir, long running show) + * "title - subtitle" (without genre_subdir, no epg info on show) + */ + if (with_genre_subdir) tvh_strlcatf(tmp, tmplen, offset, "tvshows/"); + if (*title_buf) tvh_strlcatf(tmp, tmplen, offset, "%s/%s", title_buf, title_buf); + if (*episode_buf) tvh_strlcatf(tmp, tmplen, offset, " - %s", episode_buf); + if (*subtitle_buf) tvh_strlcatf(tmp, tmplen, offset, " - %s", subtitle_buf); + /* Only include date if we don't have an explicit episode number. */ + if (!*episode_buf && *date_buf) tvh_strlcatf(tmp, tmplen, offset, "_%s", date_buf); + } + return tmp; +} + +static const char * +dvr_sub_scraper_friendly_with_genre_subdir(const char *id, const char *fmt, const void *aux, char *tmp, size_t tmplen) +{ + return _dvr_sub_scraper_friendly(id, fmt, aux, tmp, tmplen, 1); +} + +static const char * +dvr_sub_scraper_friendly_without_genre_subdir(const char *id, const char *fmt, const void *aux, char *tmp, size_t tmplen) +{ + return _dvr_sub_scraper_friendly(id, fmt, aux, tmp, tmplen, 0); +} + static const char * dvr_sub_channel(const char *id, const char *fmt, const void *aux, char *tmp, size_t tmplen) { @@ -469,6 +643,12 @@ static htsstr_substitute_t dvr_subs_entry[] = { { .id = ".g", .getval = dvr_sub_genre }, { .id = ",g", .getval = dvr_sub_genre }, { .id = ";g", .getval = dvr_sub_genre }, + { .id = "q", .getval = dvr_sub_scraper_friendly_with_genre_subdir }, + { .id = "1q", .getval = dvr_sub_scraper_friendly_with_genre_subdir }, + { .id = "2q", .getval = dvr_sub_scraper_friendly_with_genre_subdir }, + { .id = "Q", .getval = dvr_sub_scraper_friendly_without_genre_subdir }, + { .id = "1Q", .getval = dvr_sub_scraper_friendly_without_genre_subdir }, + { .id = "2Q", .getval = dvr_sub_scraper_friendly_without_genre_subdir }, { .id = NULL, .getval = NULL } };