*/
void regex_free(tvh_regex_t *regex)
{
+#if ENABLE_PCRE || ENABLE_PCRE2
+ if (regex->is_posix) {
+#endif
+ regfree(®ex->re_posix_code);
+ regex->re_posix_text = NULL;
+#if ENABLE_PCRE || ENABLE_PCRE2
+ } else {
#if ENABLE_PCRE
#ifdef PCRE_CONFIG_JIT
#if PCRE_STUDY_JIT_COMPILE
- if (regex->re_jit_stack) {
- pcre_jit_stack_free(regex->re_jit_stack);
- regex->re_jit_stack = NULL;
- }
+ if (regex->re_jit_stack) {
+ pcre_jit_stack_free(regex->re_jit_stack);
+ regex->re_jit_stack = NULL;
+ }
#endif
- pcre_free_study(regex->re_extra);
+ pcre_free_study(regex->re_extra);
#else
- pcre_free(regex->re_extra);
+ pcre_free(regex->re_extra);
#endif
- pcre_free(regex->re_code);
- regex->re_extra = NULL;
- regex->re_code = NULL;
- regex->re_text = NULL;
+ pcre_free(regex->re_code);
+ regex->re_extra = NULL;
+ regex->re_code = NULL;
+ regex->re_text = NULL;
#elif ENABLE_PCRE2
- pcre2_jit_stack_free(regex->re_jit_stack);
- pcre2_match_data_free(regex->re_match);
- pcre2_code_free(regex->re_code);
- pcre2_match_context_free(regex->re_mcontext);
- regex->re_match = NULL;
- regex->re_code = NULL;
- regex->re_mcontext = NULL;
- regex->re_jit_stack = NULL;
-#else
- regfree(®ex->re_code);
- regex->re_text = NULL;
+ pcre2_jit_stack_free(regex->re_jit_stack);
+ pcre2_match_data_free(regex->re_match);
+ pcre2_code_free(regex->re_code);
+ pcre2_match_context_free(regex->re_mcontext);
+ regex->re_match = NULL;
+ regex->re_code = NULL;
+ regex->re_mcontext = NULL;
+ regex->re_jit_stack = NULL;
+#endif
+ }
#endif
}
int regex_compile(tvh_regex_t *regex, const char *re_str, int flags, int subsys)
{
+#if ENABLE_PCRE || ENABLE_PCRE2
+ regex->is_posix = 0;
+ if (flags & TVHREGEX_POSIX) {
+ regex->is_posix = 1;
+#endif
+ int options = REG_EXTENDED;
+ if (flags & TVHREGEX_CASELESS)
+ options |= REG_ICASE;
+ if (!regcomp(®ex->re_posix_code, re_str, options))
+ return 0;
+ tvherror(subsys, "Unable to compile regex '%s'", re_str);
+ return -1;
+#if ENABLE_PCRE || ENABLE_PCRE2
+ } else {
#if ENABLE_PCRE
- const char *estr;
- int eoff;
- int options = PCRE_UTF8;
- if (flags & TVHREGEX_CASELESS)
- options |= PCRE_CASELESS;
+ const char *estr;
+ int eoff;
+ int options = PCRE_UTF8;
+ if (flags & TVHREGEX_CASELESS)
+ options |= PCRE_CASELESS;
#if PCRE_STUDY_JIT_COMPILE
- regex->re_jit_stack = NULL;
+ regex->re_jit_stack = NULL;
#endif
- regex->re_extra = NULL;
- regex->re_code = pcre_compile(re_str, options, &estr, &eoff, NULL);
- if (regex->re_code == NULL) {
- tvherror(subsys, "Unable to compile PCRE '%s': %s", re_str, estr);
- } else {
- regex->re_extra = pcre_study(regex->re_code,
- PCRE_STUDY_JIT_COMPILE, &estr);
- if (regex->re_extra == NULL && estr)
- tvherror(subsys, "Unable to study PCRE '%s': %s", re_str, estr);
- else {
+ regex->re_extra = NULL;
+ regex->re_code = pcre_compile(re_str, options, &estr, &eoff, NULL);
+ if (regex->re_code == NULL) {
+ tvherror(subsys, "Unable to compile PCRE '%s': %s", re_str, estr);
+ } else {
+ regex->re_extra = pcre_study(regex->re_code,
+ PCRE_STUDY_JIT_COMPILE, &estr);
+ if (regex->re_extra == NULL && estr)
+ tvherror(subsys, "Unable to study PCRE '%s': %s", re_str, estr);
+ else {
#if PCRE_STUDY_JIT_COMPILE
- regex->re_jit_stack = pcre_jit_stack_alloc(32*1024, 512*1024);
- if (regex->re_jit_stack)
- pcre_assign_jit_stack(regex->re_extra, NULL, regex->re_jit_stack);
+ regex->re_jit_stack = pcre_jit_stack_alloc(32*1024, 512*1024);
+ if (regex->re_jit_stack)
+ pcre_assign_jit_stack(regex->re_extra, NULL, regex->re_jit_stack);
#endif
- return 0;
+ return 0;
+ }
}
- }
- return -1;
+ return -1;
#elif ENABLE_PCRE2
- PCRE2_UCHAR8 ebuf[128];
- int ecode;
- PCRE2_SIZE eoff;
- size_t jsz;
- uint32_t options;
- assert(regex->re_jit_stack == NULL);
- regex->re_jit_stack = NULL;
- regex->re_match = NULL;
- regex->re_mcontext = pcre2_match_context_create(NULL);
- options = PCRE2_UTF;
- if (flags & TVHREGEX_CASELESS)
- options |= PCRE2_CASELESS;
- regex->re_code = pcre2_compile((PCRE2_SPTR8)re_str, -1, options,
- &ecode, &eoff, NULL);
- if (regex->re_code == NULL) {
- (void)pcre2_get_error_message(ecode, ebuf, 120);
- tvherror(subsys, "Unable to compile PCRE2 '%s': %s", re_str, ebuf);
- } else {
- regex->re_match = pcre2_match_data_create(TVHREGEX_MAX_MATCHES, NULL);
- if (re_str[0] && pcre2_jit_compile(regex->re_code, PCRE2_JIT_COMPLETE) >= 0) {
- jsz = 0;
- if (pcre2_pattern_info(regex->re_code, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0) {
- regex->re_jit_stack = pcre2_jit_stack_create(32 * 1024, 512 * 1024, NULL);
- if (regex->re_jit_stack)
- pcre2_jit_stack_assign(regex->re_mcontext, NULL, regex->re_jit_stack);
+ PCRE2_UCHAR8 ebuf[128];
+ int ecode;
+ PCRE2_SIZE eoff;
+ size_t jsz;
+ uint32_t options;
+ assert(regex->re_jit_stack == NULL);
+ regex->re_jit_stack = NULL;
+ regex->re_match = NULL;
+ regex->re_mcontext = pcre2_match_context_create(NULL);
+ options = PCRE2_UTF;
+ if (flags & TVHREGEX_CASELESS)
+ options |= PCRE2_CASELESS;
+ regex->re_code = pcre2_compile((PCRE2_SPTR8)re_str, -1, options,
+ &ecode, &eoff, NULL);
+ if (regex->re_code == NULL) {
+ (void)pcre2_get_error_message(ecode, ebuf, 120);
+ tvherror(subsys, "Unable to compile PCRE2 '%s': %s", re_str, ebuf);
+ } else {
+ regex->re_match = pcre2_match_data_create(TVHREGEX_MAX_MATCHES, NULL);
+ if (re_str[0] && pcre2_jit_compile(regex->re_code, PCRE2_JIT_COMPLETE) >= 0) {
+ jsz = 0;
+ if (pcre2_pattern_info(regex->re_code, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0) {
+ regex->re_jit_stack = pcre2_jit_stack_create(32 * 1024, 512 * 1024, NULL);
+ if (regex->re_jit_stack)
+ pcre2_jit_stack_assign(regex->re_mcontext, NULL, regex->re_jit_stack);
+ }
}
+ return 0;
}
- return 0;
+ return -1;
+#endif
}
- return -1;
-#else
- int options = REG_EXTENDED;
- if (flags & TVHREGEX_CASELESS)
- options |= REG_ICASE;
- if (!regcomp(®ex->re_code, re_str, options))
- return 0;
- tvherror(subsys, "Unable to compile regex '%s'", re_str);
- return -1;
#endif
}
int regex_match(tvh_regex_t *regex, const char *str)
{
+#if ENABLE_PCRE || ENABLE_PCRE2
+ if (regex->is_posix) {
+#endif
+ regex->re_posix_text = str;
+ return regexec(®ex->re_posix_code, str, TVHREGEX_MAX_MATCHES, regex->re_posix_match, 0);
+#if ENABLE_PCRE || ENABLE_PCRE2
+ } else {
#if ENABLE_PCRE
- regex->re_text = str;
- regex->re_matches =
- pcre_exec(regex->re_code, regex->re_extra,
- str, strlen(str), 0, 0, regex->re_match, TVHREGEX_MAX_MATCHES * 3);
- return regex->re_matches < 0;
+ regex->re_text = str;
+ regex->re_matches =
+ pcre_exec(regex->re_code, regex->re_extra,
+ str, strlen(str), 0, 0, regex->re_match, TVHREGEX_MAX_MATCHES * 3);
+ return regex->re_matches < 0;
#elif ENABLE_PCRE2
- return pcre2_match(regex->re_code, (PCRE2_SPTR8)str, -1, 0, 0,
- regex->re_match, regex->re_mcontext) <= 0;
-#else
- regex->re_text = str;
- return regexec(®ex->re_code, str, TVHREGEX_MAX_MATCHES, regex->re_match, 0);
+ return pcre2_match(regex->re_code, (PCRE2_SPTR8)str, -1, 0, 0,
+ regex->re_match, regex->re_mcontext) <= 0;
+#endif
+ }
#endif
}
assert(buf);
if (number >= TVHREGEX_MAX_MATCHES)
return -2;
+#if ENABLE_PCRE || ENABLE_PCRE2
+ if (regex->is_posix) {
+#endif
+ if (regex->re_posix_match[number].rm_so == -1)
+ return -1;
+ ssize_t size = regex->re_posix_match[number].rm_eo - regex->re_posix_match[number].rm_so;
+ if (size < 0 || size > (size_buf - 1))
+ return -1;
+ memcpy(buf, regex->re_posix_text + regex->re_posix_match[number].rm_so, size);
+ buf[size] = '\0';
+ return 0;
+#if ENABLE_PCRE || ENABLE_PCRE2
+ } else {
#if ENABLE_PCRE
- return pcre_copy_substring(regex->re_text, regex->re_match,
- (regex->re_matches == 0)
- ? TVHREGEX_MAX_MATCHES
- : regex->re_matches,
- number, buf, size_buf) < 0;
+ return pcre_copy_substring(regex->re_text, regex->re_match,
+ (regex->re_matches == 0)
+ ? TVHREGEX_MAX_MATCHES
+ : regex->re_matches,
+ number, buf, size_buf) < 0;
#elif ENABLE_PCRE2
- PCRE2_SIZE psiz = size_buf;
- return pcre2_substring_copy_bynumber(regex->re_match, number, (PCRE2_UCHAR8*)buf, &psiz);
-#else
- if (regex->re_match[number].rm_so == -1)
- return -1;
- ssize_t size = regex->re_match[number].rm_eo - regex->re_match[number].rm_so;
- if (size < 0 || size > (size_buf - 1))
- return -1;
- memcpy(buf, regex->re_text + regex->re_match[number].rm_so, size);
- buf[size] = '\0';
- return 0;
+ PCRE2_SIZE psiz = size_buf;
+ return pcre2_substring_copy_bynumber(regex->re_match, number, (PCRE2_UCHAR8*)buf, &psiz);
+#endif
+ }
#endif
}
{
if (number >= TVHREGEX_MAX_MATCHES)
return -2;
+#if ENABLE_PCRE || ENABLE_PCRE2
+ if (regex->is_posix) {
+#endif
+ if (regex->re_posix_match[number].rm_so == -1)
+ return -1;
+ return regex->re_posix_match[number].rm_eo - regex->re_posix_match[number].rm_so;
+#if ENABLE_PCRE || ENABLE_PCRE2
+ } else {
#if ENABLE_PCRE
if (number >= regex->re_matches)
return -1;
PCRE2_SIZE len;
int rc = pcre2_substring_length_bynumber(regex->re_match, number, &len);
return (!rc) ? len : -1;
-#else
- if (regex->re_match[number].rm_so == -1)
- return -1;
- return regex->re_match[number].rm_eo - regex->re_match[number].rm_so;
+#endif
+ }
#endif
}
import pprint
import json
import re
-
+import argparse
class EITScrapeTest(object):
def __init__(self):
if test.has_key('new_summary'):
self.run_test_case_i(text, subtitle_reg, test['new_summary'], "new_summary", match=2)
-
+def get_regs(parser, engine, key):
+ try:
+ l = parser[engine][key]
+ except KeyError:
+ l = parser[key]
+ res = []
+ for reg in l:
+ res.append(re.compile(reg))
+ return res
def main(argv):
- if len(argv) < 3:
- sys.exit('Usage: %s scrapperfile scrappertestfile' % argv[0])
-
- if not os.path.exists(argv[1]):
- sys.exit('ERROR: scrapperfile "%s" was not found!' % argv[1])
- if not os.path.exists(sys.argv[2]):
- sys.exit('ERROR: scrappertestfile "%s" was not found!' % argv[2])
-
- print "Opening Parser file " + argv[1]
- fp = open(argv[1], 'r')
- parser = json.load(fp)
+ parser = argparse.ArgumentParser(description='Test scraper regular expressions')
+ group = parser.add_mutually_exclusive_group()
+ group.add_argument('--pcre', dest='engine',
+ action='store_const', const='pcre',
+ help='test PCRE regular expressions if available')
+ group.add_argument('--pcre2', dest='engine',
+ action='store_const', const='pcre2',
+ help='test PCRE2 regular expressions if available')
+ parser.add_argument('scraperfile', type=argparse.FileType('r'))
+ parser.add_argument('scrapertestfile', type=argparse.FileType('r'))
+ args = parser.parse_args()
+
+ print(args.engine)
+ parser = json.load(args.scraperfile)
pprint.pprint(parser, indent=2)
# Compile the regular expressions that we will use.
- sn_reg = []
- if parser.has_key('season_num'):
- sn = parser['season_num']
- for reg in sn: sn_reg.append(re.compile(reg))
-
- en_reg = []
- if parser.has_key('episode_num'):
- en = parser['episode_num']
- for reg in en: en_reg.append(re.compile(reg))
-
- airdate_reg = []
- if parser.has_key('airdate'):
- airdate = parser['airdate']
- for reg in airdate: airdate_reg.append(re.compile(reg))
-
- subtitle_reg = []
- if parser.has_key('scrape_subtitle'):
- subtitle = parser['scrape_subtitle']
- for reg in subtitle:
- subtitle_reg.append(re.compile(reg))
+ sn_reg = get_regs(parser, args.engine, 'season_num')
+ en_reg = get_regs(parser, args.engine, 'episode_num')
+ airdate_reg = get_regs(parser, args.engine, 'airdate')
+ subtitle_reg = get_regs(parser, args.engine, 'scrape_subtitle')
# Now parse the test file which is a JSON input file
- print "Opening test input file " + argv[2]
- fp = open(argv[2], 'r')
- tests = json.load(fp)
+ tests = json.load(args.scrapertestfile)
# And run the tests
tester = EITScrapeTest()