1 Submitted By: Ken Moffat <ken at linuxfromscratch dot org>
3 Initial Package Version: 8.38
4 Upstream Status: Applied
5 Origin: Upstream, backported to 8.38 by Petr Písař at redhat
6 Description: Various fixes, including for CVE-2016-1263 and many other
7 bugs which have been fixed upstream. Many of these bugs were found by
8 fuzzing, upstream is trying to persuade its users to move to pcre2 and
9 giving low priority to further pcre1 maintenance releases.
11 From 3c80e02cd464ea049e117b423fd48fab294c51a9 Mon Sep 17 00:00:00 2001
12 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
13 Date: Thu, 26 Nov 2015 20:29:13 +0000
14 Subject: [PATCH] Fix auto-callout (?# comment bug.
16 Content-Type: text/plain; charset=UTF-8
17 Content-Transfer-Encoding: 8bit
19 git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1611 2f5784b3-3f2a-0410-8824-cb99058d5e15
21 Petr Pisar: Ported to 8.38.
23 diff --git a/pcre_compile.c b/pcre_compile.c
24 index 4d3b313..3360a8b 100644
27 @@ -4699,6 +4699,23 @@ for (;; ptr++)
31 + /* Skip over (?# comments. We need to do this here because we want to know if
32 + the next thing is a quantifier, and these comments may come between an item
33 + and its quantifier. */
35 + if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
36 + ptr[2] == CHAR_NUMBER_SIGN)
39 + while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
40 + if (*ptr == CHAR_NULL)
42 + *errorcodeptr = ERR18;
48 /* See if the next thing is a quantifier. */
51 @@ -6529,21 +6546,6 @@ for (;; ptr++)
52 case CHAR_LEFT_PARENTHESIS:
55 - /* First deal with comments. Putting this code right at the start ensures
56 - that comments have no bad side effects. */
58 - if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
61 - while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
62 - if (*ptr == CHAR_NULL)
64 - *errorcodeptr = ERR18;
70 /* Now deal with various "verbs" that can be introduced by '*'. */
72 if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
73 diff --git a/testdata/testinput2 b/testdata/testinput2
74 index e2e520f..92e3359 100644
75 --- a/testdata/testinput2
76 +++ b/testdata/testinput2
77 @@ -4217,4 +4217,12 @@ backtracking verbs. --/
89 /-- End of testinput2 --/
90 diff --git a/testdata/testinput7 b/testdata/testinput7
91 index e411a4b..00b9738 100644
92 --- a/testdata/testinput7
93 +++ b/testdata/testinput7
94 @@ -853,4 +853,8 @@ of case for anything other than the ASCII letters. --/
98 +/L(?#(|++<!(2)?/B8COZ
100 +/L(?#(|++<!(2)?/B8WCZ
102 /-- End of testinput7 --/
103 diff --git a/testdata/testoutput2 b/testdata/testoutput2
104 index 85c565d..2cf7a90 100644
105 --- a/testdata/testoutput2
106 +++ b/testdata/testoutput2
107 @@ -14574,4 +14574,40 @@ No match
109 ------------------------------------------------------------------
112 +------------------------------------------------------------------
117 +------------------------------------------------------------------
120 +------------------------------------------------------------------
125 +------------------------------------------------------------------
128 +------------------------------------------------------------------
135 +------------------------------------------------------------------
137 +/L(?#(|++<!(2)?/BCOZ
138 +------------------------------------------------------------------
145 +------------------------------------------------------------------
147 /-- End of testinput2 --/
148 diff --git a/testdata/testoutput7 b/testdata/testoutput7
149 index cc9ebdd..fdfff64 100644
150 --- a/testdata/testoutput7
151 +++ b/testdata/testoutput7
152 @@ -2348,4 +2348,24 @@ No match
154 ------------------------------------------------------------------
156 +/L(?#(|++<!(2)?/B8COZ
157 +------------------------------------------------------------------
164 +------------------------------------------------------------------
166 +/L(?#(|++<!(2)?/B8WCZ
167 +------------------------------------------------------------------
174 +------------------------------------------------------------------
176 /-- End of testinput7 --/
180 From ef6b10fcde41a2687f38d4a9ff2886b037948a1b Mon Sep 17 00:00:00 2001
181 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
182 Date: Fri, 27 Nov 2015 17:13:13 +0000
183 Subject: [PATCH 1/5] Fix negated POSIX class within negated overall class UCP
186 Content-Type: text/plain; charset=UTF-8
187 Content-Transfer-Encoding: 8bit
189 git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1612 2f5784b3-3f2a-0410-8824-cb99058d5e15
191 Petr Písař: Ported to 8.38.
192 diff --git a/pcre_compile.c b/pcre_compile.c
193 index 3360a8b..3670f1e 100644
196 @@ -5063,20 +5063,22 @@ for (;; ptr++)
200 - /* For the other POSIX classes (ascii, xdigit) we are going to fall
201 - through to the non-UCP case and build a bit map for characters with
202 - code points less than 256. If we are in a negated POSIX class
203 - within a non-negated overall class, characters with code points
204 - greater than 255 must all match. In the special case where we have
205 - not yet generated any xclass data, and this is the final item in
206 - the overall class, we need do nothing: later on, the opcode
207 + /* For the other POSIX classes (ascii, cntrl, xdigit) we are going
208 + to fall through to the non-UCP case and build a bit map for
209 + characters with code points less than 256. If we are in a negated
210 + POSIX class, characters with code points greater than 255 must
211 + either all match or all not match. In the special case where we
212 + have not yet generated any xclass data, and this is the final item
213 + in the overall class, we need do nothing: later on, the opcode
214 OP_NCLASS will be used to indicate that characters greater than 255
215 are acceptable. If we have already seen an xclass item or one may
216 follow (we have to assume that it might if this is not the end of
217 - the class), explicitly match all wide codepoints. */
218 + the class), explicitly list all wide codepoints, which will then
219 + either not match or match, depending on whether the class is or is
223 - if (!negate_class && local_negate &&
224 + if (local_negate &&
225 (xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET))
227 *class_uchardata++ = XCL_RANGE;
228 diff --git a/testdata/testinput6 b/testdata/testinput6
229 index aeb62a0..a178d3d 100644
230 --- a/testdata/testinput6
231 +++ b/testdata/testinput6
232 @@ -1553,4 +1553,13 @@
245 /-- End of testinput6 --/
246 diff --git a/testdata/testoutput6 b/testdata/testoutput6
247 index beb85aa..b64dc0d 100644
248 --- a/testdata/testoutput6
249 +++ b/testdata/testoutput6
250 @@ -2557,4 +2557,20 @@ No match
270 /-- End of testinput6 --/
274 From bfc1dfa660c24dc7a75108d934290e50d7db2719 Mon Sep 17 00:00:00 2001
275 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
276 Date: Fri, 27 Nov 2015 17:41:04 +0000
277 Subject: [PATCH 2/5] Fix bug for isolated \E between an item and its qualifier
278 when auto callout is set.
280 Content-Type: text/plain; charset=UTF-8
281 Content-Transfer-Encoding: 8bit
283 git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1613 2f5784b3-3f2a-0410-8824-cb99058d5e15
285 Petr Písař: Ported to 8.38.
287 diff --git a/pcre_compile.c b/pcre_compile.c
288 index 3670f1e..5786cd3 100644
291 @@ -4645,9 +4645,10 @@ for (;; ptr++)
295 - /* If in \Q...\E, check for the end; if not, we have a literal */
296 + /* If in \Q...\E, check for the end; if not, we have a literal. Otherwise an
297 + isolated \E is ignored. */
299 - if (inescq && c != CHAR_NULL)
300 + if (c != CHAR_NULL)
302 if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
304 @@ -4655,7 +4656,7 @@ for (;; ptr++)
311 if (previous_callout != NULL)
313 @@ -4670,7 +4671,6 @@ for (;; ptr++)
317 - /* Control does not reach here. */
320 /* In extended mode, skip white space and comments. We need a loop in order
321 diff --git a/testdata/testinput2 b/testdata/testinput2
322 index 92e3359..e8ca4fe 100644
323 --- a/testdata/testinput2
324 +++ b/testdata/testinput2
325 @@ -4225,4 +4225,6 @@ backtracking verbs. --/
331 /-- End of testinput2 --/
332 diff --git a/testdata/testoutput2 b/testdata/testoutput2
333 index 2cf7a90..09756b8 100644
334 --- a/testdata/testoutput2
335 +++ b/testdata/testoutput2
336 @@ -14610,4 +14610,18 @@ No match
338 ------------------------------------------------------------------
341 +------------------------------------------------------------------
352 +------------------------------------------------------------------
354 /-- End of testinput2 --/
358 From 108377b836fc29a84f5286287629d96549b1c777 Mon Sep 17 00:00:00 2001
359 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
360 Date: Sun, 29 Nov 2015 17:38:25 +0000
361 Subject: [PATCH 3/5] Give error for regexec with pmatch=NULL and REG_STARTEND
364 Content-Type: text/plain; charset=UTF-8
365 Content-Transfer-Encoding: 8bit
367 git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1614 2f5784b3-3f2a-0410-8824-cb99058d5e15
369 Petr Písař: Ported to 8.38.
371 diff --git a/pcreposix.c b/pcreposix.c
372 index f024423..dcc13ef 100644
375 @@ -364,6 +364,7 @@ start location rather than being passed as a PCRE "starting offset". */
377 if ((eflags & REG_STARTEND) != 0)
379 + if (pmatch == NULL) return REG_INVARG;
380 so = pmatch[0].rm_so;
381 eo = pmatch[0].rm_eo;
386 From e347b40d5bb12f7ef1e632aa649571a107be7d8a Mon Sep 17 00:00:00 2001
387 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
388 Date: Sun, 29 Nov 2015 17:46:23 +0000
389 Subject: [PATCH 4/5] Allow for up to 32-bit numbers in the ordin() function in
392 Content-Type: text/plain; charset=UTF-8
393 Content-Transfer-Encoding: 8bit
395 git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1615 2f5784b3-3f2a-0410-8824-cb99058d5e15
397 Petr Písař: Ported to 8.38.
399 diff --git a/pcregrep.c b/pcregrep.c
400 index 64986b0..cd53c64 100644
403 @@ -2437,7 +2437,7 @@ return options;
407 -static char buffer[8];
408 +static char buffer[14];
415 From e78ad4264b16988b826bd2939a1781c1165a92d9 Mon Sep 17 00:00:00 2001
416 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
417 Date: Mon, 30 Nov 2015 17:44:45 +0000
418 Subject: [PATCH 5/5] Fix \Q\E before qualifier bug when auto callouts are
421 Content-Type: text/plain; charset=UTF-8
422 Content-Transfer-Encoding: 8bit
424 git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1616 2f5784b3-3f2a-0410-8824-cb99058d5e15
426 Petr Písař: Ported to 8.38.
428 diff --git a/pcre_compile.c b/pcre_compile.c
429 index 5786cd3..beed46b 100644
432 @@ -4671,17 +4671,27 @@ for (;; ptr++)
437 + /* Check for the start of a \Q...\E sequence. We must do this here rather
438 + than later in case it is immediately followed by \E, which turns it into a
439 + "do nothing" sequence. */
441 + if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
449 - /* In extended mode, skip white space and comments. We need a loop in order
450 - to check for more white space and more comments after a comment. */
451 + /* In extended mode, skip white space and comments. */
453 if ((options & PCRE_EXTENDED) != 0)
456 + const pcre_uchar *wscptr = ptr;
457 + while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
458 + if (c == CHAR_NUMBER_SIGN)
460 - while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
461 - if (c != CHAR_NUMBER_SIGN) break;
463 while (*ptr != CHAR_NULL)
465 @@ -4695,7 +4705,15 @@ for (;; ptr++)
466 if (utf) FORWARDCHAR(ptr);
469 - c = *ptr; /* Either NULL or the char after a newline */
472 + /* If we skipped any characters, restart the loop. Otherwise, we didn't see
482 @@ -7900,16 +7918,6 @@ for (;; ptr++)
486 - if (escape == ESC_Q) /* Handle start of quoted string */
488 - if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
489 - ptr += 2; /* avoid empty string */
490 - else inescq = TRUE;
494 - if (escape == ESC_E) continue; /* Perl ignores an orphan \E */
496 /* For metasequences that actually match a character, we disable the
497 setting of a first character if it hasn't already been set. */
499 diff --git a/testdata/testinput2 b/testdata/testinput2
500 index e8ca4fe..3a1134f 100644
501 --- a/testdata/testinput2
502 +++ b/testdata/testinput2
503 @@ -4227,4 +4227,6 @@ backtracking verbs. --/
509 /-- End of testinput2 --/
510 diff --git a/testdata/testoutput2 b/testdata/testoutput2
511 index 09756b8..ac33cc4 100644
512 --- a/testdata/testoutput2
513 +++ b/testdata/testoutput2
514 @@ -14624,4 +14624,19 @@ No match
516 ------------------------------------------------------------------
519 +------------------------------------------------------------------
531 +------------------------------------------------------------------
533 /-- End of testinput2 --/
537 From 46ed1a703b067e5b679eacf6500a54dae35f8130 Mon Sep 17 00:00:00 2001
538 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
539 Date: Thu, 3 Dec 2015 17:05:40 +0000
540 Subject: [PATCH] Fix /x bug when pattern starts with white space and (?-x)
542 Content-Type: text/plain; charset=UTF-8
543 Content-Transfer-Encoding: 8bit
545 git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1617 2f5784b3-3f2a-0410-8824-cb99058d5e15
547 Petr Písař: Ported to 8.38.
549 diff --git a/pcre_compile.c b/pcre_compile.c
550 index beed46b..57719b9 100644
553 @@ -7607,39 +7607,15 @@ for (;; ptr++)
554 newoptions = (options | set) & (~unset);
556 /* If the options ended with ')' this is not the start of a nested
557 - group with option changes, so the options change at this level. If this
558 - item is right at the start of the pattern, the options can be
559 - abstracted and made external in the pre-compile phase, and ignored in
560 - the compile phase. This can be helpful when matching -- for instance in
561 - caseless checking of required bytes.
563 - If the code pointer is not (cd->start_code + 1 + LINK_SIZE), we are
564 - definitely *not* at the start of the pattern because something has been
565 - compiled. In the pre-compile phase, however, the code pointer can have
566 - that value after the start, because it gets reset as code is discarded
567 - during the pre-compile. However, this can happen only at top level - if
568 - we are within parentheses, the starting BRA will still be present. At
569 - any parenthesis level, the length value can be used to test if anything
570 - has been compiled at that level. Thus, a test for both these conditions
571 - is necessary to ensure we correctly detect the start of the pattern in
574 + group with option changes, so the options change at this level.
575 If we are not at the pattern start, reset the greedy defaults and the
576 case value for firstchar and reqchar. */
578 if (*ptr == CHAR_RIGHT_PARENTHESIS)
580 - if (code == cd->start_code + 1 + LINK_SIZE &&
581 - (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
583 - cd->external_options = newoptions;
587 - greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
588 - greedy_non_default = greedy_default ^ 1;
589 - req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
591 + greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
592 + greedy_non_default = greedy_default ^ 1;
593 + req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
595 /* Change options at this level, and pass them back for use
596 in subsequent branches. */
597 diff --git a/testdata/testoutput2 b/testdata/testoutput2
598 index ac33cc4..6c42897 100644
599 --- a/testdata/testoutput2
600 +++ b/testdata/testoutput2
601 @@ -419,7 +419,7 @@ Need char = '>'
604 Capturing subpattern count = 0
610 @@ -443,7 +443,7 @@ Need char = '='
613 Capturing subpattern count = 0
619 @@ -477,7 +477,7 @@ Failed: lookbehind assertion is not fixed length at offset 12
622 Capturing subpattern count = 0
625 First char = 'a' (caseless)
626 Need char = 'c' (caseless)
628 @@ -489,7 +489,7 @@ No need char
631 Capturing subpattern count = 0
632 -Options: anchored caseless
637 @@ -502,7 +502,7 @@ No need char
639 Capturing subpattern count = 0
640 May match empty string
641 -Options: anchored dotall
646 @@ -516,7 +516,7 @@ Starting chars: a b c d
649 Capturing subpattern count = 0
654 Subject length lower bound = 1
655 @@ -524,7 +524,7 @@ Starting chars: A B C D a b c d
658 Capturing subpattern count = 1
663 Subject length lower bound = 1
664 @@ -538,7 +538,7 @@ No need char
667 Capturing subpattern count = 1
668 -Options: caseless multiline
670 First char at start or follows newline
673 @@ -1179,7 +1179,7 @@ No need char
675 ------------------------------------------------------------------
676 Capturing subpattern count = 1
677 -Options: anchored dotall
682 @@ -2735,7 +2735,7 @@ No match
684 ------------------------------------------------------------------
685 Capturing subpattern count = 0
686 -Options: caseless extended
688 First char = 'a' (caseless)
689 Need char = 'c' (caseless)
691 @@ -2748,7 +2748,7 @@ Need char = 'c' (caseless)
693 ------------------------------------------------------------------
694 Capturing subpattern count = 0
695 -Options: caseless extended
697 First char = 'a' (caseless)
698 Need char = 'c' (caseless)
700 @@ -3095,7 +3095,7 @@ Need char = 'b'
702 ------------------------------------------------------------------
703 Capturing subpattern count = 0
709 @@ -3497,7 +3497,7 @@ Need char = 'c'
712 Capturing subpattern count = 0
717 Subject length lower bound = 1
718 @@ -6299,7 +6299,7 @@ Capturing subpattern count = 3
719 Named capturing subpatterns:
722 -Options: anchored dupnames
724 Duplicate name status changes
730 From db1fb68feddc9afe6f8822d099fa9ff25e3ea8e7 Mon Sep 17 00:00:00 2001
731 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
732 Date: Sat, 5 Dec 2015 16:30:14 +0000
733 Subject: [PATCH] Fix copy named substring bug.
735 Content-Type: text/plain; charset=UTF-8
736 Content-Transfer-Encoding: 8bit
738 git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1618 2f5784b3-3f2a-0410-8824-cb99058d5e15
740 Petr Písař: Ported to 8.38.
741 diff --git a/pcre_get.c b/pcre_get.c
742 index 8094b34..41eda9c 100644
745 @@ -250,6 +250,7 @@ Arguments:
746 code the compiled regex
747 stringname the name of the capturing substring
748 ovector the vector of matched substrings
749 + stringcount number of captured substrings
751 Returns: the number of the first that is set,
752 or the number of the last one if none are set,
753 @@ -258,13 +259,16 @@ Returns: the number of the first that is set,
755 #if defined COMPILE_PCRE8
757 -get_first_set(const pcre *code, const char *stringname, int *ovector)
758 +get_first_set(const pcre *code, const char *stringname, int *ovector,
760 #elif defined COMPILE_PCRE16
762 -get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
763 +get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector,
765 #elif defined COMPILE_PCRE32
767 -get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector)
768 +get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector,
772 const REAL_PCRE *re = (const REAL_PCRE *)code;
773 @@ -295,7 +299,7 @@ if (entrysize <= 0) return entrysize;
774 for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
776 int n = GET2(entry, 0);
777 - if (ovector[n*2] >= 0) return n;
778 + if (n < stringcount && ovector[n*2] >= 0) return n;
780 return GET2(entry, 0);
782 @@ -402,7 +406,7 @@ pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
783 PCRE_UCHAR32 *buffer, int size)
786 -int n = get_first_set(code, stringname, ovector);
787 +int n = get_first_set(code, stringname, ovector, stringcount);
788 if (n <= 0) return n;
789 #if defined COMPILE_PCRE8
790 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
791 @@ -619,7 +623,7 @@ pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
792 PCRE_SPTR32 *stringptr)
795 -int n = get_first_set(code, stringname, ovector);
796 +int n = get_first_set(code, stringname, ovector, stringcount);
797 if (n <= 0) return n;
798 #if defined COMPILE_PCRE8
799 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
800 diff --git a/testdata/testinput2 b/testdata/testinput2
801 index 3a1134f..00ffe32 100644
802 --- a/testdata/testinput2
803 +++ b/testdata/testinput2
804 @@ -4229,4 +4229,7 @@ backtracking verbs. --/
808 +/(?<A>)(?J:(?<B>)(?<B>))(?<C>)/
811 /-- End of testinput2 --/
812 diff --git a/testdata/testoutput2 b/testdata/testoutput2
813 index 6c42897..ffb4466 100644
814 --- a/testdata/testoutput2
815 +++ b/testdata/testoutput2
816 @@ -14639,4 +14639,9 @@ No match
818 ------------------------------------------------------------------
820 +/(?<A>)(?J:(?<B>)(?<B>))(?<C>)/
822 +Matched, but too many substrings
823 +copy substring C failed -7
825 /-- End of testinput2 --/
829 From 40363ebc19baeab160abaaa55dc84322a89ac35a Mon Sep 17 00:00:00 2001
830 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
831 Date: Sat, 5 Dec 2015 16:58:46 +0000
832 Subject: [PATCH] Fix (by hacking) another length computation issue.
834 Content-Type: text/plain; charset=UTF-8
835 Content-Transfer-Encoding: 8bit
837 git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1619 2f5784b3-3f2a-0410-8824-cb99058d5e15
839 Petr Písař: Ported to 8.38.
841 diff --git a/pcre_compile.c b/pcre_compile.c
842 index 57719b9..087bf2a 100644
845 @@ -7280,7 +7280,7 @@ for (;; ptr++)
846 issue is fixed "properly" in PCRE2. As PCRE1 is now in maintenance
847 only mode, we finesse the bug by allowing more memory always. */
849 - *lengthptr += 2 + 2*LINK_SIZE;
850 + *lengthptr += 4 + 4*LINK_SIZE;
852 /* It is even worse than that. The current reference may be to an
853 existing named group with a different number (so apparently not
854 diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16
855 index 9a0a12d..280692e 100644
856 --- a/testdata/testoutput11-16
857 +++ b/testdata/testoutput11-16
858 @@ -231,7 +231,7 @@ Memory allocation (code space): 73
859 ------------------------------------------------------------------
861 /(?P<a>a)...(?P=a)bbb(?P>a)d/BM
862 -Memory allocation (code space): 77
863 +Memory allocation (code space): 93
864 ------------------------------------------------------------------
867 diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
868 index 57e5da0..cdbda74 100644
869 --- a/testdata/testoutput11-32
870 +++ b/testdata/testoutput11-32
871 @@ -231,7 +231,7 @@ Memory allocation (code space): 155
872 ------------------------------------------------------------------
874 /(?P<a>a)...(?P=a)bbb(?P>a)d/BM
875 -Memory allocation (code space): 157
876 +Memory allocation (code space): 189
877 ------------------------------------------------------------------
880 diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8
881 index 748548a..cb37896 100644
882 --- a/testdata/testoutput11-8
883 +++ b/testdata/testoutput11-8
884 @@ -231,7 +231,7 @@ Memory allocation (code space): 45
885 ------------------------------------------------------------------
887 /(?P<a>a)...(?P=a)bbb(?P>a)d/BM
888 -Memory allocation (code space): 50
889 +Memory allocation (code space): 62
890 ------------------------------------------------------------------
896 From 4f47274a2eb10131d88145ad7fd0eed4027a0c51 Mon Sep 17 00:00:00 2001
897 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
898 Date: Tue, 8 Dec 2015 11:06:40 +0000
899 Subject: [PATCH] Fix get_substring_list() bug when \K is used in an assertion.
901 Content-Type: text/plain; charset=UTF-8
902 Content-Transfer-Encoding: 8bit
904 git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1620 2f5784b3-3f2a-0410-8824-cb99058d5e15
906 Petr Písař: ported to 8.38.
908 diff --git a/pcre_get.c b/pcre_get.c
909 index 41eda9c..cdd2abc 100644
912 @@ -461,7 +461,10 @@ pcre_uchar **stringlist;
915 for (i = 0; i < double_count; i += 2)
916 - size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
918 + size += sizeof(pcre_uchar *) + IN_UCHARS(1);
919 + if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]);
922 stringlist = (pcre_uchar **)(PUBL(malloc))(size);
923 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
924 @@ -477,7 +480,7 @@ p = (pcre_uchar *)(stringlist + stringcount + 1);
926 for (i = 0; i < double_count; i += 2)
928 - int len = ovector[i+1] - ovector[i];
929 + int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
930 memcpy(p, subject + ovector[i], IN_UCHARS(len));
933 diff --git a/testdata/testinput2 b/testdata/testinput2
934 index 00ffe32..967a241 100644
935 --- a/testdata/testinput2
936 +++ b/testdata/testinput2
937 @@ -4232,4 +4232,7 @@ backtracking verbs. --/
938 /(?<A>)(?J:(?<B>)(?<B>))(?<C>)/
942 + ring bpattingbobnd $ 1,oern cou \rb\L
944 /-- End of testinput2 --/
945 diff --git a/testdata/testoutput2 b/testdata/testoutput2
946 index ffb4466..5fb28d5 100644
947 --- a/testdata/testoutput2
948 +++ b/testdata/testoutput2
949 @@ -14644,4 +14644,10 @@ No match
950 Matched, but too many substrings
951 copy substring C failed -7
954 + ring bpattingbobnd $ 1,oern cou \rb\L
955 +Start of matched string is beyond its end - displaying from end to start.
959 /-- End of testinput2 --/
963 From 3da5528b47b88c32224cf9d14d8a4e80cd7a0815 Mon Sep 17 00:00:00 2001
964 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
965 Date: Sat, 6 Feb 2016 16:54:14 +0000
966 Subject: [PATCH] Fix pcretest bad behaviour for callout in lookbehind.
968 Content-Type: text/plain; charset=UTF-8
969 Content-Transfer-Encoding: 8bit
971 git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1625 2f5784b3-3f2a-0410-8824-cb99058d5e15
973 Petr Písař: Ported to 8.38.
975 diff --git a/pcretest.c b/pcretest.c
976 index 488e419..63869fd 100644
979 @@ -2250,7 +2250,7 @@ data is not zero. */
980 static int callout(pcre_callout_block *cb)
982 FILE *f = (first_callout | callout_extra)? outfile : NULL;
983 -int i, pre_start, post_start, subject_length;
984 +int i, current_position, pre_start, post_start, subject_length;
988 @@ -2280,14 +2280,19 @@ printed lengths of the substrings. */
990 if (f != NULL) fprintf(f, "--->");
992 +/* If a lookbehind is involved, the current position may be earlier than the
993 +match start. If so, use the match start instead. */
995 +current_position = (cb->current_position >= cb->start_match)?
996 + cb->current_position : cb->start_match;
998 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
999 PCHARS(post_start, cb->subject, cb->start_match,
1000 - cb->current_position - cb->start_match, f);
1001 + current_position - cb->start_match, f);
1003 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1005 -PCHARSV(cb->subject, cb->current_position,
1006 - cb->subject_length - cb->current_position, f);
1007 +PCHARSV(cb->subject, current_position, cb->subject_length - current_position, f);
1009 if (f != NULL) fprintf(f, "\n");
1011 @@ -5740,3 +5745,4 @@ return yield;
1014 /* End of pcretest.c */
1016 diff --git a/testdata/testinput2 b/testdata/testinput2
1017 index 967a241..086e0f4 100644
1018 --- a/testdata/testinput2
1019 +++ b/testdata/testinput2
1020 @@ -4235,4 +4235,8 @@ backtracking verbs. --/
1022 ring bpattingbobnd $ 1,oern cou \rb\L
1028 /-- End of testinput2 --/
1029 diff --git a/testdata/testoutput2 b/testdata/testoutput2
1030 index 5fb28d5..d414a72 100644
1031 --- a/testdata/testoutput2
1032 +++ b/testdata/testoutput2
1033 @@ -14650,4 +14650,19 @@ Start of matched string is beyond its end - displaying from end to start.
1052 /-- End of testinput2 --/
1056 From 943a5105b9fe2842851003f692c7077a6cdbeefe Mon Sep 17 00:00:00 2001
1057 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
1058 Date: Wed, 10 Feb 2016 19:13:17 +0000
1059 Subject: [PATCH] Fix workspace overflow for (*ACCEPT) with deeply nested
1062 Content-Type: text/plain; charset=UTF-8
1063 Content-Transfer-Encoding: 8bit
1065 git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1631 2f5784b3-3f2a-0410-8824-cb99058d5e15
1067 Petr Písař: Ported to 8.38.
1069 diff --git a/pcre_compile.c b/pcre_compile.c
1070 index b9a239e..5019854 100644
1071 --- a/pcre_compile.c
1072 +++ b/pcre_compile.c
1074 and semantics are as close as possible to those of the Perl 5 language.
1076 Written by Philip Hazel
1077 - Copyright (c) 1997-2014 University of Cambridge
1078 + Copyright (c) 1997-2016 University of Cambridge
1080 -----------------------------------------------------------------------------
1081 Redistribution and use in source and binary forms, with or without
1082 @@ -560,6 +560,7 @@ static const char error_texts[] =
1084 "parentheses are too deeply nested (stack check)\0"
1085 "digits missing in \\x{} or \\o{}\0"
1086 + "regular expression is too complicated\0"
1089 /* Table to identify digits and hex digits. This is used when compiling
1090 @@ -4591,7 +4592,8 @@ for (;; ptr++)
1091 if (code > cd->start_workspace + cd->workspace_size -
1092 WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */
1094 - *errorcodeptr = ERR52;
1095 + *errorcodeptr = (code >= cd->start_workspace + cd->workspace_size)?
1100 @@ -6626,8 +6628,21 @@ for (;; ptr++)
1101 cd->had_accept = TRUE;
1102 for (oc = cd->open_caps; oc != NULL; oc = oc->next)
1104 - *code++ = OP_CLOSE;
1105 - PUT2INC(code, 0, oc->number);
1106 + if (lengthptr != NULL)
1108 +#ifdef COMPILE_PCRE8
1109 + *lengthptr += 1 + IMM2_SIZE;
1110 +#elif defined COMPILE_PCRE16
1111 + *lengthptr += 2 + IMM2_SIZE;
1112 +#elif defined COMPILE_PCRE32
1113 + *lengthptr += 4 + IMM2_SIZE;
1118 + *code++ = OP_CLOSE;
1119 + PUT2INC(code, 0, oc->number);
1123 (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
1124 diff --git a/pcre_internal.h b/pcre_internal.h
1125 index f7a5ee7..dbfe80e 100644
1126 --- a/pcre_internal.h
1127 +++ b/pcre_internal.h
1129 and semantics are as close as possible to those of the Perl 5 language.
1131 Written by Philip Hazel
1132 - Copyright (c) 1997-2014 University of Cambridge
1133 + Copyright (c) 1997-2016 University of Cambridge
1135 -----------------------------------------------------------------------------
1136 Redistribution and use in source and binary forms, with or without
1137 @@ -2289,7 +2289,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
1138 ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
1139 ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
1140 ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,
1141 - ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERRCOUNT };
1142 + ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERRCOUNT };
1144 /* JIT compiling modes. The function list is indexed by them. */
1146 diff --git a/pcreposix.c b/pcreposix.c
1147 index dcc13ef..55b6ddc 100644
1151 and semantics are as close as possible to those of the Perl 5 language.
1153 Written by Philip Hazel
1154 - Copyright (c) 1997-2014 University of Cambridge
1155 + Copyright (c) 1997-2016 University of Cambridge
1157 -----------------------------------------------------------------------------
1158 Redistribution and use in source and binary forms, with or without
1159 @@ -173,7 +173,8 @@ static const int eint[] = {
1160 REG_BADPAT, /* group name must start with a non-digit */
1162 REG_BADPAT, /* parentheses too deeply nested (stack check) */
1163 - REG_BADPAT /* missing digits in \x{} or \o{} */
1164 + REG_BADPAT, /* missing digits in \x{} or \o{} */
1165 + REG_BADPAT /* pattern too complicated */
1168 /* Table of texts corresponding to POSIX error codes */
1169 diff --git a/testdata/testinput11 b/testdata/testinput11
1170 index ac9d228..6f0989a 100644
1171 --- a/testdata/testinput11
1172 +++ b/testdata/testinput11
1173 @@ -138,4 +138,6 @@ is required for these tests. --/
1177 +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/
1179 /-- End of testinput11 --/
1180 diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16
1181 index 280692e..3c485da 100644
1182 --- a/testdata/testoutput11-16
1183 +++ b/testdata/testoutput11-16
1184 @@ -765,4 +765,7 @@ Memory allocation (code space): 14
1186 ------------------------------------------------------------------
1188 +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/
1189 +Failed: regular expression is too complicated at offset 490
1191 /-- End of testinput11 --/
1192 diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
1193 index cdbda74..e19518d 100644
1194 --- a/testdata/testoutput11-32
1195 +++ b/testdata/testoutput11-32
1196 @@ -765,4 +765,7 @@ Memory allocation (code space): 28
1198 ------------------------------------------------------------------
1200 +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/
1201 +Failed: missing ) at offset 509
1203 /-- End of testinput11 --/
1204 diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8
1205 index cb37896..5a4fbb2 100644
1206 --- a/testdata/testoutput11-8
1207 +++ b/testdata/testoutput11-8
1208 @@ -765,4 +765,7 @@ Memory allocation (code space): 10
1210 ------------------------------------------------------------------
1212 +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/
1213 +Failed: missing ) at offset 509
1215 /-- End of testinput11 --/
1219 From b7537308b7c758f33c347cb0bec62754c43c271f Mon Sep 17 00:00:00 2001
1220 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
1221 Date: Sat, 27 Feb 2016 17:38:11 +0000
1222 Subject: [PATCH] Yet another duplicate name bugfix by overestimating the
1223 memory needed (i.e. another hack - PCRE2 has this "properly" fixed).
1225 Content-Type: text/plain; charset=UTF-8
1226 Content-Transfer-Encoding: 8bit
1228 git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1636 2f5784b3-3f2a-0410-8824-cb99058d5e15
1230 Petr Písař: Ported to 8.38.
1232 diff --git a/pcre_compile.c b/pcre_compile.c
1233 index 5019854..4ffea0c 100644
1234 --- a/pcre_compile.c
1235 +++ b/pcre_compile.c
1236 @@ -7311,7 +7311,12 @@ for (;; ptr++)
1237 so far in order to get the number. If the name is not found, leave
1238 the value of recno as 0 for a forward reference. */
1241 + /* This patch (removing "else") fixes a problem when a reference is
1242 + to multiple identically named nested groups from within the nest.
1243 + Once again, it is not the "proper" fix, and it results in an
1244 + over-allocation of memory. */
1248 ng = cd->named_groups;
1249 for (i = 0; i < cd->names_found; i++, ng++)
1250 diff --git a/testdata/testinput2 b/testdata/testinput2
1251 index 086e0f4..c805f5f 100644
1252 --- a/testdata/testinput2
1253 +++ b/testdata/testinput2
1254 @@ -4239,4 +4239,6 @@ backtracking verbs. --/
1258 +/((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/
1260 /-- End of testinput2 --/
1261 diff --git a/testdata/testoutput2 b/testdata/testoutput2
1262 index d414a72..800a72f 100644
1263 --- a/testdata/testoutput2
1264 +++ b/testdata/testoutput2
1265 @@ -14665,4 +14665,6 @@ Start of matched string is beyond its end - displaying from end to start.
1269 +/((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/
1271 /-- End of testinput2 --/
1275 From 0fc2edb79b3815c6511fd75c36a57893e4acaee6 Mon Sep 17 00:00:00 2001
1276 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
1277 Date: Sat, 27 Feb 2016 17:55:24 +0000
1278 Subject: [PATCH] Fix pcretest loop for global matching with an ovector size
1281 Content-Type: text/plain; charset=UTF-8
1282 Content-Transfer-Encoding: 8bit
1284 git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1637 2f5784b3-3f2a-0410-8824-cb99058d5e15
1286 Petr Písař: Ported to 8.38.
1288 diff --git a/pcretest.c b/pcretest.c
1289 index 63869fd..78ef517 100644
1292 @@ -5617,6 +5617,12 @@ while (!done)
1296 + if (use_size_offsets < 2)
1298 + fprintf(outfile, "Cannot do global matching with an ovector size < 2\n");
1302 /* If we have matched an empty string, first check to see if we are at
1303 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
1304 Perl's /g options does. This turns out to be rather cunning. First we set
1308 From b3db1b7de5cfaa026ec2bc4a393129461a0f5c57 Mon Sep 17 00:00:00 2001
1309 From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
1310 Date: Sat, 27 Feb 2016 18:44:41 +0000
1311 Subject: [PATCH] Fix non-diagnosis of missing assertion after (?(?C).
1313 Content-Type: text/plain; charset=UTF-8
1314 Content-Transfer-Encoding: 8bit
1316 git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1638 2f5784b3-3f2a-0410-8824-cb99058d5e15
1318 Petr Písař: Ported to 8.38.
1320 diff --git a/pcre_compile.c b/pcre_compile.c
1321 index 4ffea0c..254c629 100644
1322 --- a/pcre_compile.c
1323 +++ b/pcre_compile.c
1324 @@ -485,7 +485,7 @@ static const char error_texts[] =
1325 "lookbehind assertion is not fixed length\0"
1326 "malformed number or name after (?(\0"
1327 "conditional group contains more than two branches\0"
1328 - "assertion expected after (?(\0"
1329 + "assertion expected after (?( or (?(?C)\0"
1330 "(?R or (?[+-]digits must be followed by )\0"
1332 "unknown POSIX class name\0"
1333 @@ -6771,6 +6771,15 @@ for (;; ptr++)
1334 for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
1335 if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
1338 + /* tempptr should now be pointing to the opening parenthesis of the
1339 + assertion condition. */
1341 + if (*tempptr != CHAR_LEFT_PARENTHESIS)
1343 + *errorcodeptr = ERR28;
1348 /* For conditions that are assertions, check the syntax, and then exit
1349 diff --git a/testdata/testinput2 b/testdata/testinput2
1350 index c805f5f..75e402e 100644
1351 --- a/testdata/testinput2
1352 +++ b/testdata/testinput2
1353 @@ -4241,4 +4241,6 @@ backtracking verbs. --/
1355 /((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/
1359 /-- End of testinput2 --/
1360 diff --git a/testdata/testoutput2 b/testdata/testoutput2
1361 index 800a72f..5e88d1a 100644
1362 --- a/testdata/testoutput2
1363 +++ b/testdata/testoutput2
1364 @@ -555,13 +555,13 @@ Failed: malformed number or name after (?( at offset 4
1365 Failed: malformed number or name after (?( at offset 4
1368 -Failed: assertion expected after (?( at offset 3
1369 +Failed: assertion expected after (?( or (?(?C) at offset 3
1372 Failed: reference to non-existent subpattern at offset 7
1375 -Failed: assertion expected after (?( at offset 3
1376 +Failed: assertion expected after (?( or (?(?C) at offset 3
1379 Capturing subpattern count = 1
1380 @@ -7870,7 +7870,7 @@ No match
1381 Failed: malformed number or name after (?( at offset 6
1384 -Failed: assertion expected after (?( at offset 4
1385 +Failed: assertion expected after (?( or (?(?C) at offset 4
1388 Failed: reference to non-existent subpattern at offset 7
1389 @@ -14346,7 +14346,7 @@ No match
1393 -Failed: assertion expected after (?( at offset 3
1394 +Failed: assertion expected after (?( or (?(?C) at offset 3
1397 ------------------------------------------------------------------
1398 @@ -14667,4 +14667,7 @@ No match
1400 /((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/
1403 +Failed: assertion expected after (?( or (?(?C) at offset 4
1405 /-- End of testinput2 --/