]>
Commit | Line | Data |
---|---|---|
c0705301 MF |
1 | Submitted By: Ken Moffat <ken at linuxfromscratch dot org> |
2 | Date: 2016-03-16 | |
3 | Initial Package Version: 8.38 | |
4 | Upstream Status: Applied | |
5 | Origin: Upstream, backported to 8.38 by Petr Písař at redhat | |
6 | Description: Various fixes, including for CVE-2016-1263 and many other | |
7 | bugs which have been fixed upstream. Many of these bugs were found by | |
8 | fuzzing, upstream is trying to persuade its users to move to pcre2 and | |
9 | giving low priority to further pcre1 maintenance releases. | |
10 | ||
11 | From 3c80e02cd464ea049e117b423fd48fab294c51a9 Mon Sep 17 00:00:00 2001 | |
12 | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | |
13 | Date: Thu, 26 Nov 2015 20:29:13 +0000 | |
14 | Subject: [PATCH] Fix auto-callout (?# comment bug. | |
15 | MIME-Version: 1.0 | |
16 | Content-Type: text/plain; charset=UTF-8 | |
17 | Content-Transfer-Encoding: 8bit | |
18 | ||
19 | git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1611 2f5784b3-3f2a-0410-8824-cb99058d5e15 | |
20 | ||
21 | Petr Pisar: Ported to 8.38. | |
22 | ||
23 | diff --git a/pcre_compile.c b/pcre_compile.c | |
24 | index 4d3b313..3360a8b 100644 | |
25 | --- a/pcre_compile.c | |
26 | +++ b/pcre_compile.c | |
27 | @@ -4699,6 +4699,23 @@ for (;; ptr++) | |
28 | } | |
29 | } | |
30 | ||
31 | + /* Skip over (?# comments. We need to do this here because we want to know if | |
32 | + the next thing is a quantifier, and these comments may come between an item | |
33 | + and its quantifier. */ | |
34 | + | |
35 | + if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK && | |
36 | + ptr[2] == CHAR_NUMBER_SIGN) | |
37 | + { | |
38 | + ptr += 3; | |
39 | + while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; | |
40 | + if (*ptr == CHAR_NULL) | |
41 | + { | |
42 | + *errorcodeptr = ERR18; | |
43 | + goto FAILED; | |
44 | + } | |
45 | + continue; | |
46 | + } | |
47 | + | |
48 | /* See if the next thing is a quantifier. */ | |
49 | ||
50 | is_quantifier = | |
51 | @@ -6529,21 +6546,6 @@ for (;; ptr++) | |
52 | case CHAR_LEFT_PARENTHESIS: | |
53 | ptr++; | |
54 | ||
55 | - /* First deal with comments. Putting this code right at the start ensures | |
56 | - that comments have no bad side effects. */ | |
57 | - | |
58 | - if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN) | |
59 | - { | |
60 | - ptr += 2; | |
61 | - while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; | |
62 | - if (*ptr == CHAR_NULL) | |
63 | - { | |
64 | - *errorcodeptr = ERR18; | |
65 | - goto FAILED; | |
66 | - } | |
67 | - continue; | |
68 | - } | |
69 | - | |
70 | /* Now deal with various "verbs" that can be introduced by '*'. */ | |
71 | ||
72 | if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':' | |
73 | diff --git a/testdata/testinput2 b/testdata/testinput2 | |
74 | index e2e520f..92e3359 100644 | |
75 | --- a/testdata/testinput2 | |
76 | +++ b/testdata/testinput2 | |
77 | @@ -4217,4 +4217,12 @@ backtracking verbs. --/ | |
78 | ||
79 | /a[[:punct:]b]/BZ | |
80 | ||
81 | +/L(?#(|++<!(2)?/BZ | |
82 | + | |
83 | +/L(?#(|++<!(2)?/BOZ | |
84 | + | |
85 | +/L(?#(|++<!(2)?/BCZ | |
86 | + | |
87 | +/L(?#(|++<!(2)?/BCOZ | |
88 | + | |
89 | /-- End of testinput2 --/ | |
90 | diff --git a/testdata/testinput7 b/testdata/testinput7 | |
91 | index e411a4b..00b9738 100644 | |
92 | --- a/testdata/testinput7 | |
93 | +++ b/testdata/testinput7 | |
94 | @@ -853,4 +853,8 @@ of case for anything other than the ASCII letters. --/ | |
95 | ||
96 | /a[b[:punct:]]/8WBZ | |
97 | ||
98 | +/L(?#(|++<!(2)?/B8COZ | |
99 | + | |
100 | +/L(?#(|++<!(2)?/B8WCZ | |
101 | + | |
102 | /-- End of testinput7 --/ | |
103 | diff --git a/testdata/testoutput2 b/testdata/testoutput2 | |
104 | index 85c565d..2cf7a90 100644 | |
105 | --- a/testdata/testoutput2 | |
106 | +++ b/testdata/testoutput2 | |
107 | @@ -14574,4 +14574,40 @@ No match | |
108 | End | |
109 | ------------------------------------------------------------------ | |
110 | ||
111 | +/L(?#(|++<!(2)?/BZ | |
112 | +------------------------------------------------------------------ | |
113 | + Bra | |
114 | + L?+ | |
115 | + Ket | |
116 | + End | |
117 | +------------------------------------------------------------------ | |
118 | + | |
119 | +/L(?#(|++<!(2)?/BOZ | |
120 | +------------------------------------------------------------------ | |
121 | + Bra | |
122 | + L? | |
123 | + Ket | |
124 | + End | |
125 | +------------------------------------------------------------------ | |
126 | + | |
127 | +/L(?#(|++<!(2)?/BCZ | |
128 | +------------------------------------------------------------------ | |
129 | + Bra | |
130 | + Callout 255 0 14 | |
131 | + L?+ | |
132 | + Callout 255 14 0 | |
133 | + Ket | |
134 | + End | |
135 | +------------------------------------------------------------------ | |
136 | + | |
137 | +/L(?#(|++<!(2)?/BCOZ | |
138 | +------------------------------------------------------------------ | |
139 | + Bra | |
140 | + Callout 255 0 14 | |
141 | + L? | |
142 | + Callout 255 14 0 | |
143 | + Ket | |
144 | + End | |
145 | +------------------------------------------------------------------ | |
146 | + | |
147 | /-- End of testinput2 --/ | |
148 | diff --git a/testdata/testoutput7 b/testdata/testoutput7 | |
149 | index cc9ebdd..fdfff64 100644 | |
150 | --- a/testdata/testoutput7 | |
151 | +++ b/testdata/testoutput7 | |
152 | @@ -2348,4 +2348,24 @@ No match | |
153 | End | |
154 | ------------------------------------------------------------------ | |
155 | ||
156 | +/L(?#(|++<!(2)?/B8COZ | |
157 | +------------------------------------------------------------------ | |
158 | + Bra | |
159 | + Callout 255 0 14 | |
160 | + L? | |
161 | + Callout 255 14 0 | |
162 | + Ket | |
163 | + End | |
164 | +------------------------------------------------------------------ | |
165 | + | |
166 | +/L(?#(|++<!(2)?/B8WCZ | |
167 | +------------------------------------------------------------------ | |
168 | + Bra | |
169 | + Callout 255 0 14 | |
170 | + L?+ | |
171 | + Callout 255 14 0 | |
172 | + Ket | |
173 | + End | |
174 | +------------------------------------------------------------------ | |
175 | + | |
176 | /-- End of testinput7 --/ | |
177 | -- | |
178 | 2.4.3 | |
179 | ||
180 | From ef6b10fcde41a2687f38d4a9ff2886b037948a1b Mon Sep 17 00:00:00 2001 | |
181 | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | |
182 | Date: Fri, 27 Nov 2015 17:13:13 +0000 | |
183 | Subject: [PATCH 1/5] Fix negated POSIX class within negated overall class UCP | |
184 | bug. | |
185 | MIME-Version: 1.0 | |
186 | Content-Type: text/plain; charset=UTF-8 | |
187 | Content-Transfer-Encoding: 8bit | |
188 | ||
189 | git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1612 2f5784b3-3f2a-0410-8824-cb99058d5e15 | |
190 | ||
191 | Petr Písař: Ported to 8.38. | |
192 | diff --git a/pcre_compile.c b/pcre_compile.c | |
193 | index 3360a8b..3670f1e 100644 | |
194 | --- a/pcre_compile.c | |
195 | +++ b/pcre_compile.c | |
196 | @@ -5063,20 +5063,22 @@ for (;; ptr++) | |
197 | ptr = tempptr + 1; | |
198 | continue; | |
199 | ||
200 | - /* For the other POSIX classes (ascii, xdigit) we are going to fall | |
201 | - through to the non-UCP case and build a bit map for characters with | |
202 | - code points less than 256. If we are in a negated POSIX class | |
203 | - within a non-negated overall class, characters with code points | |
204 | - greater than 255 must all match. In the special case where we have | |
205 | - not yet generated any xclass data, and this is the final item in | |
206 | - the overall class, we need do nothing: later on, the opcode | |
207 | + /* For the other POSIX classes (ascii, cntrl, xdigit) we are going | |
208 | + to fall through to the non-UCP case and build a bit map for | |
209 | + characters with code points less than 256. If we are in a negated | |
210 | + POSIX class, characters with code points greater than 255 must | |
211 | + either all match or all not match. In the special case where we | |
212 | + have not yet generated any xclass data, and this is the final item | |
213 | + in the overall class, we need do nothing: later on, the opcode | |
214 | OP_NCLASS will be used to indicate that characters greater than 255 | |
215 | are acceptable. If we have already seen an xclass item or one may | |
216 | follow (we have to assume that it might if this is not the end of | |
217 | - the class), explicitly match all wide codepoints. */ | |
218 | + the class), explicitly list all wide codepoints, which will then | |
219 | + either not match or match, depending on whether the class is or is | |
220 | + not negated. */ | |
221 | ||
222 | default: | |
223 | - if (!negate_class && local_negate && | |
224 | + if (local_negate && | |
225 | (xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET)) | |
226 | { | |
227 | *class_uchardata++ = XCL_RANGE; | |
228 | diff --git a/testdata/testinput6 b/testdata/testinput6 | |
229 | index aeb62a0..a178d3d 100644 | |
230 | --- a/testdata/testinput6 | |
231 | +++ b/testdata/testinput6 | |
232 | @@ -1553,4 +1553,13 @@ | |
233 | \x{200} | |
234 | \x{37e} | |
235 | ||
236 | +/[^[:^ascii:]\d]/8W | |
237 | + a | |
238 | + ~ | |
239 | + 0 | |
240 | + \a | |
241 | + \x{7f} | |
242 | + \x{389} | |
243 | + \x{20ac} | |
244 | + | |
245 | /-- End of testinput6 --/ | |
246 | diff --git a/testdata/testoutput6 b/testdata/testoutput6 | |
247 | index beb85aa..b64dc0d 100644 | |
248 | --- a/testdata/testoutput6 | |
249 | +++ b/testdata/testoutput6 | |
250 | @@ -2557,4 +2557,20 @@ No match | |
251 | \x{37e} | |
252 | 0: \x{37e} | |
253 | ||
254 | +/[^[:^ascii:]\d]/8W | |
255 | + a | |
256 | + 0: a | |
257 | + ~ | |
258 | + 0: ~ | |
259 | + 0 | |
260 | +No match | |
261 | + \a | |
262 | + 0: \x{07} | |
263 | + \x{7f} | |
264 | + 0: \x{7f} | |
265 | + \x{389} | |
266 | +No match | |
267 | + \x{20ac} | |
268 | +No match | |
269 | + | |
270 | /-- End of testinput6 --/ | |
271 | -- | |
272 | 2.4.3 | |
273 | ||
274 | From bfc1dfa660c24dc7a75108d934290e50d7db2719 Mon Sep 17 00:00:00 2001 | |
275 | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | |
276 | Date: Fri, 27 Nov 2015 17:41:04 +0000 | |
277 | Subject: [PATCH 2/5] Fix bug for isolated \E between an item and its qualifier | |
278 | when auto callout is set. | |
279 | MIME-Version: 1.0 | |
280 | Content-Type: text/plain; charset=UTF-8 | |
281 | Content-Transfer-Encoding: 8bit | |
282 | ||
283 | git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1613 2f5784b3-3f2a-0410-8824-cb99058d5e15 | |
284 | ||
285 | Petr Písař: Ported to 8.38. | |
286 | ||
287 | diff --git a/pcre_compile.c b/pcre_compile.c | |
288 | index 3670f1e..5786cd3 100644 | |
289 | --- a/pcre_compile.c | |
290 | +++ b/pcre_compile.c | |
291 | @@ -4645,9 +4645,10 @@ for (;; ptr++) | |
292 | goto FAILED; | |
293 | } | |
294 | ||
295 | - /* If in \Q...\E, check for the end; if not, we have a literal */ | |
296 | + /* If in \Q...\E, check for the end; if not, we have a literal. Otherwise an | |
297 | + isolated \E is ignored. */ | |
298 | ||
299 | - if (inescq && c != CHAR_NULL) | |
300 | + if (c != CHAR_NULL) | |
301 | { | |
302 | if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) | |
303 | { | |
304 | @@ -4655,7 +4656,7 @@ for (;; ptr++) | |
305 | ptr++; | |
306 | continue; | |
307 | } | |
308 | - else | |
309 | + else if (inescq) | |
310 | { | |
311 | if (previous_callout != NULL) | |
312 | { | |
313 | @@ -4670,7 +4671,6 @@ for (;; ptr++) | |
314 | } | |
315 | goto NORMAL_CHAR; | |
316 | } | |
317 | - /* Control does not reach here. */ | |
318 | } | |
319 | ||
320 | /* In extended mode, skip white space and comments. We need a loop in order | |
321 | diff --git a/testdata/testinput2 b/testdata/testinput2 | |
322 | index 92e3359..e8ca4fe 100644 | |
323 | --- a/testdata/testinput2 | |
324 | +++ b/testdata/testinput2 | |
325 | @@ -4225,4 +4225,6 @@ backtracking verbs. --/ | |
326 | ||
327 | /L(?#(|++<!(2)?/BCOZ | |
328 | ||
329 | +/(A*)\E+/CBZ | |
330 | + | |
331 | /-- End of testinput2 --/ | |
332 | diff --git a/testdata/testoutput2 b/testdata/testoutput2 | |
333 | index 2cf7a90..09756b8 100644 | |
334 | --- a/testdata/testoutput2 | |
335 | +++ b/testdata/testoutput2 | |
336 | @@ -14610,4 +14610,18 @@ No match | |
337 | End | |
338 | ------------------------------------------------------------------ | |
339 | ||
340 | +/(A*)\E+/CBZ | |
341 | +------------------------------------------------------------------ | |
342 | + Bra | |
343 | + Callout 255 0 7 | |
344 | + SCBra 1 | |
345 | + Callout 255 1 2 | |
346 | + A* | |
347 | + Callout 255 3 0 | |
348 | + KetRmax | |
349 | + Callout 255 7 0 | |
350 | + Ket | |
351 | + End | |
352 | +------------------------------------------------------------------ | |
353 | + | |
354 | /-- End of testinput2 --/ | |
355 | -- | |
356 | 2.4.3 | |
357 | ||
358 | From 108377b836fc29a84f5286287629d96549b1c777 Mon Sep 17 00:00:00 2001 | |
359 | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | |
360 | Date: Sun, 29 Nov 2015 17:38:25 +0000 | |
361 | Subject: [PATCH 3/5] Give error for regexec with pmatch=NULL and REG_STARTEND | |
362 | set. | |
363 | MIME-Version: 1.0 | |
364 | Content-Type: text/plain; charset=UTF-8 | |
365 | Content-Transfer-Encoding: 8bit | |
366 | ||
367 | git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1614 2f5784b3-3f2a-0410-8824-cb99058d5e15 | |
368 | ||
369 | Petr Písař: Ported to 8.38. | |
370 | ||
371 | diff --git a/pcreposix.c b/pcreposix.c | |
372 | index f024423..dcc13ef 100644 | |
373 | --- a/pcreposix.c | |
374 | +++ b/pcreposix.c | |
375 | @@ -364,6 +364,7 @@ start location rather than being passed as a PCRE "starting offset". */ | |
376 | ||
377 | if ((eflags & REG_STARTEND) != 0) | |
378 | { | |
379 | + if (pmatch == NULL) return REG_INVARG; | |
380 | so = pmatch[0].rm_so; | |
381 | eo = pmatch[0].rm_eo; | |
382 | } | |
383 | -- | |
384 | 2.4.3 | |
385 | ||
386 | From e347b40d5bb12f7ef1e632aa649571a107be7d8a Mon Sep 17 00:00:00 2001 | |
387 | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | |
388 | Date: Sun, 29 Nov 2015 17:46:23 +0000 | |
389 | Subject: [PATCH 4/5] Allow for up to 32-bit numbers in the ordin() function in | |
390 | pcregrep. | |
391 | MIME-Version: 1.0 | |
392 | Content-Type: text/plain; charset=UTF-8 | |
393 | Content-Transfer-Encoding: 8bit | |
394 | ||
395 | git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1615 2f5784b3-3f2a-0410-8824-cb99058d5e15 | |
396 | ||
397 | Petr Písař: Ported to 8.38. | |
398 | ||
399 | diff --git a/pcregrep.c b/pcregrep.c | |
400 | index 64986b0..cd53c64 100644 | |
401 | --- a/pcregrep.c | |
402 | +++ b/pcregrep.c | |
403 | @@ -2437,7 +2437,7 @@ return options; | |
404 | static char * | |
405 | ordin(int n) | |
406 | { | |
407 | -static char buffer[8]; | |
408 | +static char buffer[14]; | |
409 | char *p = buffer; | |
410 | sprintf(p, "%d", n); | |
411 | while (*p != 0) p++; | |
412 | -- | |
413 | 2.4.3 | |
414 | ||
415 | From e78ad4264b16988b826bd2939a1781c1165a92d9 Mon Sep 17 00:00:00 2001 | |
416 | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | |
417 | Date: Mon, 30 Nov 2015 17:44:45 +0000 | |
418 | Subject: [PATCH 5/5] Fix \Q\E before qualifier bug when auto callouts are | |
419 | enabled. | |
420 | MIME-Version: 1.0 | |
421 | Content-Type: text/plain; charset=UTF-8 | |
422 | Content-Transfer-Encoding: 8bit | |
423 | ||
424 | git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1616 2f5784b3-3f2a-0410-8824-cb99058d5e15 | |
425 | ||
426 | Petr Písař: Ported to 8.38. | |
427 | ||
428 | diff --git a/pcre_compile.c b/pcre_compile.c | |
429 | index 5786cd3..beed46b 100644 | |
430 | --- a/pcre_compile.c | |
431 | +++ b/pcre_compile.c | |
432 | @@ -4671,17 +4671,27 @@ for (;; ptr++) | |
433 | } | |
434 | goto NORMAL_CHAR; | |
435 | } | |
436 | + | |
437 | + /* Check for the start of a \Q...\E sequence. We must do this here rather | |
438 | + than later in case it is immediately followed by \E, which turns it into a | |
439 | + "do nothing" sequence. */ | |
440 | + | |
441 | + if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q) | |
442 | + { | |
443 | + inescq = TRUE; | |
444 | + ptr++; | |
445 | + continue; | |
446 | + } | |
447 | } | |
448 | ||
449 | - /* In extended mode, skip white space and comments. We need a loop in order | |
450 | - to check for more white space and more comments after a comment. */ | |
451 | + /* In extended mode, skip white space and comments. */ | |
452 | ||
453 | if ((options & PCRE_EXTENDED) != 0) | |
454 | { | |
455 | - for (;;) | |
456 | + const pcre_uchar *wscptr = ptr; | |
457 | + while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr); | |
458 | + if (c == CHAR_NUMBER_SIGN) | |
459 | { | |
460 | - while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr); | |
461 | - if (c != CHAR_NUMBER_SIGN) break; | |
462 | ptr++; | |
463 | while (*ptr != CHAR_NULL) | |
464 | { | |
465 | @@ -4695,7 +4705,15 @@ for (;; ptr++) | |
466 | if (utf) FORWARDCHAR(ptr); | |
467 | #endif | |
468 | } | |
469 | - c = *ptr; /* Either NULL or the char after a newline */ | |
470 | + } | |
471 | + | |
472 | + /* If we skipped any characters, restart the loop. Otherwise, we didn't see | |
473 | + a comment. */ | |
474 | + | |
475 | + if (ptr > wscptr) | |
476 | + { | |
477 | + ptr--; | |
478 | + continue; | |
479 | } | |
480 | } | |
481 | ||
482 | @@ -7900,16 +7918,6 @@ for (;; ptr++) | |
483 | c = ec; | |
484 | else | |
485 | { | |
486 | - if (escape == ESC_Q) /* Handle start of quoted string */ | |
487 | - { | |
488 | - if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) | |
489 | - ptr += 2; /* avoid empty string */ | |
490 | - else inescq = TRUE; | |
491 | - continue; | |
492 | - } | |
493 | - | |
494 | - if (escape == ESC_E) continue; /* Perl ignores an orphan \E */ | |
495 | - | |
496 | /* For metasequences that actually match a character, we disable the | |
497 | setting of a first character if it hasn't already been set. */ | |
498 | ||
499 | diff --git a/testdata/testinput2 b/testdata/testinput2 | |
500 | index e8ca4fe..3a1134f 100644 | |
501 | --- a/testdata/testinput2 | |
502 | +++ b/testdata/testinput2 | |
503 | @@ -4227,4 +4227,6 @@ backtracking verbs. --/ | |
504 | ||
505 | /(A*)\E+/CBZ | |
506 | ||
507 | +/()\Q\E*]/BCZ | |
508 | + | |
509 | /-- End of testinput2 --/ | |
510 | diff --git a/testdata/testoutput2 b/testdata/testoutput2 | |
511 | index 09756b8..ac33cc4 100644 | |
512 | --- a/testdata/testoutput2 | |
513 | +++ b/testdata/testoutput2 | |
514 | @@ -14624,4 +14624,19 @@ No match | |
515 | End | |
516 | ------------------------------------------------------------------ | |
517 | ||
518 | +/()\Q\E*]/BCZ | |
519 | +------------------------------------------------------------------ | |
520 | + Bra | |
521 | + Callout 255 0 7 | |
522 | + Brazero | |
523 | + SCBra 1 | |
524 | + Callout 255 1 0 | |
525 | + KetRmax | |
526 | + Callout 255 7 1 | |
527 | + ] | |
528 | + Callout 255 8 0 | |
529 | + Ket | |
530 | + End | |
531 | +------------------------------------------------------------------ | |
532 | + | |
533 | /-- End of testinput2 --/ | |
534 | -- | |
535 | 2.4.3 | |
536 | ||
537 | From 46ed1a703b067e5b679eacf6500a54dae35f8130 Mon Sep 17 00:00:00 2001 | |
538 | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | |
539 | Date: Thu, 3 Dec 2015 17:05:40 +0000 | |
540 | Subject: [PATCH] Fix /x bug when pattern starts with white space and (?-x) | |
541 | MIME-Version: 1.0 | |
542 | Content-Type: text/plain; charset=UTF-8 | |
543 | Content-Transfer-Encoding: 8bit | |
544 | ||
545 | git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1617 2f5784b3-3f2a-0410-8824-cb99058d5e15 | |
546 | ||
547 | Petr Písař: Ported to 8.38. | |
548 | ||
549 | diff --git a/pcre_compile.c b/pcre_compile.c | |
550 | index beed46b..57719b9 100644 | |
551 | --- a/pcre_compile.c | |
552 | +++ b/pcre_compile.c | |
553 | @@ -7607,39 +7607,15 @@ for (;; ptr++) | |
554 | newoptions = (options | set) & (~unset); | |
555 | ||
556 | /* If the options ended with ')' this is not the start of a nested | |
557 | - group with option changes, so the options change at this level. If this | |
558 | - item is right at the start of the pattern, the options can be | |
559 | - abstracted and made external in the pre-compile phase, and ignored in | |
560 | - the compile phase. This can be helpful when matching -- for instance in | |
561 | - caseless checking of required bytes. | |
562 | - | |
563 | - If the code pointer is not (cd->start_code + 1 + LINK_SIZE), we are | |
564 | - definitely *not* at the start of the pattern because something has been | |
565 | - compiled. In the pre-compile phase, however, the code pointer can have | |
566 | - that value after the start, because it gets reset as code is discarded | |
567 | - during the pre-compile. However, this can happen only at top level - if | |
568 | - we are within parentheses, the starting BRA will still be present. At | |
569 | - any parenthesis level, the length value can be used to test if anything | |
570 | - has been compiled at that level. Thus, a test for both these conditions | |
571 | - is necessary to ensure we correctly detect the start of the pattern in | |
572 | - both phases. | |
573 | - | |
574 | + group with option changes, so the options change at this level. | |
575 | If we are not at the pattern start, reset the greedy defaults and the | |
576 | case value for firstchar and reqchar. */ | |
577 | ||
578 | if (*ptr == CHAR_RIGHT_PARENTHESIS) | |
579 | { | |
580 | - if (code == cd->start_code + 1 + LINK_SIZE && | |
581 | - (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE)) | |
582 | - { | |
583 | - cd->external_options = newoptions; | |
584 | - } | |
585 | - else | |
586 | - { | |
587 | - greedy_default = ((newoptions & PCRE_UNGREEDY) != 0); | |
588 | - greedy_non_default = greedy_default ^ 1; | |
589 | - req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0; | |
590 | - } | |
591 | + greedy_default = ((newoptions & PCRE_UNGREEDY) != 0); | |
592 | + greedy_non_default = greedy_default ^ 1; | |
593 | + req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0; | |
594 | ||
595 | /* Change options at this level, and pass them back for use | |
596 | in subsequent branches. */ | |
597 | diff --git a/testdata/testoutput2 b/testdata/testoutput2 | |
598 | index ac33cc4..6c42897 100644 | |
599 | --- a/testdata/testoutput2 | |
600 | +++ b/testdata/testoutput2 | |
601 | @@ -419,7 +419,7 @@ Need char = '>' | |
602 | ||
603 | /(?U)<.*>/I | |
604 | Capturing subpattern count = 0 | |
605 | -Options: ungreedy | |
606 | +No options | |
607 | First char = '<' | |
608 | Need char = '>' | |
609 | abc<def>ghi<klm>nop | |
610 | @@ -443,7 +443,7 @@ Need char = '=' | |
611 | ||
612 | /(?U)={3,}?/I | |
613 | Capturing subpattern count = 0 | |
614 | -Options: ungreedy | |
615 | +No options | |
616 | First char = '=' | |
617 | Need char = '=' | |
618 | abc========def | |
619 | @@ -477,7 +477,7 @@ Failed: lookbehind assertion is not fixed length at offset 12 | |
620 | ||
621 | /(?i)abc/I | |
622 | Capturing subpattern count = 0 | |
623 | -Options: caseless | |
624 | +No options | |
625 | First char = 'a' (caseless) | |
626 | Need char = 'c' (caseless) | |
627 | ||
628 | @@ -489,7 +489,7 @@ No need char | |
629 | ||
630 | /(?i)^1234/I | |
631 | Capturing subpattern count = 0 | |
632 | -Options: anchored caseless | |
633 | +Options: anchored | |
634 | No first char | |
635 | No need char | |
636 | ||
637 | @@ -502,7 +502,7 @@ No need char | |
638 | /(?s).*/I | |
639 | Capturing subpattern count = 0 | |
640 | May match empty string | |
641 | -Options: anchored dotall | |
642 | +Options: anchored | |
643 | No first char | |
644 | No need char | |
645 | ||
646 | @@ -516,7 +516,7 @@ Starting chars: a b c d | |
647 | ||
648 | /(?i)[abcd]/IS | |
649 | Capturing subpattern count = 0 | |
650 | -Options: caseless | |
651 | +No options | |
652 | No first char | |
653 | No need char | |
654 | Subject length lower bound = 1 | |
655 | @@ -524,7 +524,7 @@ Starting chars: A B C D a b c d | |
656 | ||
657 | /(?m)[xy]|(b|c)/IS | |
658 | Capturing subpattern count = 1 | |
659 | -Options: multiline | |
660 | +No options | |
661 | No first char | |
662 | No need char | |
663 | Subject length lower bound = 1 | |
664 | @@ -538,7 +538,7 @@ No need char | |
665 | ||
666 | /(?i)(^a|^b)/Im | |
667 | Capturing subpattern count = 1 | |
668 | -Options: caseless multiline | |
669 | +Options: multiline | |
670 | First char at start or follows newline | |
671 | No need char | |
672 | ||
673 | @@ -1179,7 +1179,7 @@ No need char | |
674 | End | |
675 | ------------------------------------------------------------------ | |
676 | Capturing subpattern count = 1 | |
677 | -Options: anchored dotall | |
678 | +Options: anchored | |
679 | No first char | |
680 | No need char | |
681 | ||
682 | @@ -2735,7 +2735,7 @@ No match | |
683 | End | |
684 | ------------------------------------------------------------------ | |
685 | Capturing subpattern count = 0 | |
686 | -Options: caseless extended | |
687 | +Options: extended | |
688 | First char = 'a' (caseless) | |
689 | Need char = 'c' (caseless) | |
690 | ||
691 | @@ -2748,7 +2748,7 @@ Need char = 'c' (caseless) | |
692 | End | |
693 | ------------------------------------------------------------------ | |
694 | Capturing subpattern count = 0 | |
695 | -Options: caseless extended | |
696 | +Options: extended | |
697 | First char = 'a' (caseless) | |
698 | Need char = 'c' (caseless) | |
699 | ||
700 | @@ -3095,7 +3095,7 @@ Need char = 'b' | |
701 | End | |
702 | ------------------------------------------------------------------ | |
703 | Capturing subpattern count = 0 | |
704 | -Options: ungreedy | |
705 | +No options | |
706 | First char = 'x' | |
707 | Need char = 'b' | |
708 | xaaaab | |
709 | @@ -3497,7 +3497,7 @@ Need char = 'c' | |
710 | ||
711 | /(?i)[ab]/IS | |
712 | Capturing subpattern count = 0 | |
713 | -Options: caseless | |
714 | +No options | |
715 | No first char | |
716 | No need char | |
717 | Subject length lower bound = 1 | |
718 | @@ -6299,7 +6299,7 @@ Capturing subpattern count = 3 | |
719 | Named capturing subpatterns: | |
720 | A 2 | |
721 | A 3 | |
722 | -Options: anchored dupnames | |
723 | +Options: anchored | |
724 | Duplicate name status changes | |
725 | No first char | |
726 | No need char | |
727 | -- | |
728 | 2.4.3 | |
729 | ||
730 | From db1fb68feddc9afe6f8822d099fa9ff25e3ea8e7 Mon Sep 17 00:00:00 2001 | |
731 | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | |
732 | Date: Sat, 5 Dec 2015 16:30:14 +0000 | |
733 | Subject: [PATCH] Fix copy named substring bug. | |
734 | MIME-Version: 1.0 | |
735 | Content-Type: text/plain; charset=UTF-8 | |
736 | Content-Transfer-Encoding: 8bit | |
737 | ||
738 | git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1618 2f5784b3-3f2a-0410-8824-cb99058d5e15 | |
739 | ||
740 | Petr Písař: Ported to 8.38. | |
741 | diff --git a/pcre_get.c b/pcre_get.c | |
742 | index 8094b34..41eda9c 100644 | |
743 | --- a/pcre_get.c | |
744 | +++ b/pcre_get.c | |
745 | @@ -250,6 +250,7 @@ Arguments: | |
746 | code the compiled regex | |
747 | stringname the name of the capturing substring | |
748 | ovector the vector of matched substrings | |
749 | + stringcount number of captured substrings | |
750 | ||
751 | Returns: the number of the first that is set, | |
752 | or the number of the last one if none are set, | |
753 | @@ -258,13 +259,16 @@ Returns: the number of the first that is set, | |
754 | ||
755 | #if defined COMPILE_PCRE8 | |
756 | static int | |
757 | -get_first_set(const pcre *code, const char *stringname, int *ovector) | |
758 | +get_first_set(const pcre *code, const char *stringname, int *ovector, | |
759 | + int stringcount) | |
760 | #elif defined COMPILE_PCRE16 | |
761 | static int | |
762 | -get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector) | |
763 | +get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector, | |
764 | + int stringcount) | |
765 | #elif defined COMPILE_PCRE32 | |
766 | static int | |
767 | -get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector) | |
768 | +get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector, | |
769 | + int stringcount) | |
770 | #endif | |
771 | { | |
772 | const REAL_PCRE *re = (const REAL_PCRE *)code; | |
773 | @@ -295,7 +299,7 @@ if (entrysize <= 0) return entrysize; | |
774 | for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize) | |
775 | { | |
776 | int n = GET2(entry, 0); | |
777 | - if (ovector[n*2] >= 0) return n; | |
778 | + if (n < stringcount && ovector[n*2] >= 0) return n; | |
779 | } | |
780 | return GET2(entry, 0); | |
781 | } | |
782 | @@ -402,7 +406,7 @@ pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject, | |
783 | PCRE_UCHAR32 *buffer, int size) | |
784 | #endif | |
785 | { | |
786 | -int n = get_first_set(code, stringname, ovector); | |
787 | +int n = get_first_set(code, stringname, ovector, stringcount); | |
788 | if (n <= 0) return n; | |
789 | #if defined COMPILE_PCRE8 | |
790 | return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); | |
791 | @@ -619,7 +623,7 @@ pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject, | |
792 | PCRE_SPTR32 *stringptr) | |
793 | #endif | |
794 | { | |
795 | -int n = get_first_set(code, stringname, ovector); | |
796 | +int n = get_first_set(code, stringname, ovector, stringcount); | |
797 | if (n <= 0) return n; | |
798 | #if defined COMPILE_PCRE8 | |
799 | return pcre_get_substring(subject, ovector, stringcount, n, stringptr); | |
800 | diff --git a/testdata/testinput2 b/testdata/testinput2 | |
801 | index 3a1134f..00ffe32 100644 | |
802 | --- a/testdata/testinput2 | |
803 | +++ b/testdata/testinput2 | |
804 | @@ -4229,4 +4229,7 @@ backtracking verbs. --/ | |
805 | ||
806 | /()\Q\E*]/BCZ | |
807 | ||
808 | +/(?<A>)(?J:(?<B>)(?<B>))(?<C>)/ | |
809 | + \O\CC | |
810 | + | |
811 | /-- End of testinput2 --/ | |
812 | diff --git a/testdata/testoutput2 b/testdata/testoutput2 | |
813 | index 6c42897..ffb4466 100644 | |
814 | --- a/testdata/testoutput2 | |
815 | +++ b/testdata/testoutput2 | |
816 | @@ -14639,4 +14639,9 @@ No match | |
817 | End | |
818 | ------------------------------------------------------------------ | |
819 | ||
820 | +/(?<A>)(?J:(?<B>)(?<B>))(?<C>)/ | |
821 | + \O\CC | |
822 | +Matched, but too many substrings | |
823 | +copy substring C failed -7 | |
824 | + | |
825 | /-- End of testinput2 --/ | |
826 | -- | |
827 | 2.4.3 | |
828 | ||
829 | From 40363ebc19baeab160abaaa55dc84322a89ac35a Mon Sep 17 00:00:00 2001 | |
830 | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | |
831 | Date: Sat, 5 Dec 2015 16:58:46 +0000 | |
832 | Subject: [PATCH] Fix (by hacking) another length computation issue. | |
833 | MIME-Version: 1.0 | |
834 | Content-Type: text/plain; charset=UTF-8 | |
835 | Content-Transfer-Encoding: 8bit | |
836 | ||
837 | git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1619 2f5784b3-3f2a-0410-8824-cb99058d5e15 | |
838 | ||
839 | Petr Písař: Ported to 8.38. | |
840 | ||
841 | diff --git a/pcre_compile.c b/pcre_compile.c | |
842 | index 57719b9..087bf2a 100644 | |
843 | --- a/pcre_compile.c | |
844 | +++ b/pcre_compile.c | |
845 | @@ -7280,7 +7280,7 @@ for (;; ptr++) | |
846 | issue is fixed "properly" in PCRE2. As PCRE1 is now in maintenance | |
847 | only mode, we finesse the bug by allowing more memory always. */ | |
848 | ||
849 | - *lengthptr += 2 + 2*LINK_SIZE; | |
850 | + *lengthptr += 4 + 4*LINK_SIZE; | |
851 | ||
852 | /* It is even worse than that. The current reference may be to an | |
853 | existing named group with a different number (so apparently not | |
854 | diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 | |
855 | index 9a0a12d..280692e 100644 | |
856 | --- a/testdata/testoutput11-16 | |
857 | +++ b/testdata/testoutput11-16 | |
858 | @@ -231,7 +231,7 @@ Memory allocation (code space): 73 | |
859 | ------------------------------------------------------------------ | |
860 | ||
861 | /(?P<a>a)...(?P=a)bbb(?P>a)d/BM | |
862 | -Memory allocation (code space): 77 | |
863 | +Memory allocation (code space): 93 | |
864 | ------------------------------------------------------------------ | |
865 | 0 24 Bra | |
866 | 2 5 CBra 1 | |
867 | diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 | |
868 | index 57e5da0..cdbda74 100644 | |
869 | --- a/testdata/testoutput11-32 | |
870 | +++ b/testdata/testoutput11-32 | |
871 | @@ -231,7 +231,7 @@ Memory allocation (code space): 155 | |
872 | ------------------------------------------------------------------ | |
873 | ||
874 | /(?P<a>a)...(?P=a)bbb(?P>a)d/BM | |
875 | -Memory allocation (code space): 157 | |
876 | +Memory allocation (code space): 189 | |
877 | ------------------------------------------------------------------ | |
878 | 0 24 Bra | |
879 | 2 5 CBra 1 | |
880 | diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8 | |
881 | index 748548a..cb37896 100644 | |
882 | --- a/testdata/testoutput11-8 | |
883 | +++ b/testdata/testoutput11-8 | |
884 | @@ -231,7 +231,7 @@ Memory allocation (code space): 45 | |
885 | ------------------------------------------------------------------ | |
886 | ||
887 | /(?P<a>a)...(?P=a)bbb(?P>a)d/BM | |
888 | -Memory allocation (code space): 50 | |
889 | +Memory allocation (code space): 62 | |
890 | ------------------------------------------------------------------ | |
891 | 0 30 Bra | |
892 | 3 7 CBra 1 | |
893 | -- | |
894 | 2.4.3 | |
895 | ||
896 | From 4f47274a2eb10131d88145ad7fd0eed4027a0c51 Mon Sep 17 00:00:00 2001 | |
897 | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | |
898 | Date: Tue, 8 Dec 2015 11:06:40 +0000 | |
899 | Subject: [PATCH] Fix get_substring_list() bug when \K is used in an assertion. | |
900 | MIME-Version: 1.0 | |
901 | Content-Type: text/plain; charset=UTF-8 | |
902 | Content-Transfer-Encoding: 8bit | |
903 | ||
904 | git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1620 2f5784b3-3f2a-0410-8824-cb99058d5e15 | |
905 | ||
906 | Petr Písař: ported to 8.38. | |
907 | ||
908 | diff --git a/pcre_get.c b/pcre_get.c | |
909 | index 41eda9c..cdd2abc 100644 | |
910 | --- a/pcre_get.c | |
911 | +++ b/pcre_get.c | |
912 | @@ -461,7 +461,10 @@ pcre_uchar **stringlist; | |
913 | pcre_uchar *p; | |
914 | ||
915 | for (i = 0; i < double_count; i += 2) | |
916 | - size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1); | |
917 | + { | |
918 | + size += sizeof(pcre_uchar *) + IN_UCHARS(1); | |
919 | + if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]); | |
920 | + } | |
921 | ||
922 | stringlist = (pcre_uchar **)(PUBL(malloc))(size); | |
923 | if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; | |
924 | @@ -477,7 +480,7 @@ p = (pcre_uchar *)(stringlist + stringcount + 1); | |
925 | ||
926 | for (i = 0; i < double_count; i += 2) | |
927 | { | |
928 | - int len = ovector[i+1] - ovector[i]; | |
929 | + int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0; | |
930 | memcpy(p, subject + ovector[i], IN_UCHARS(len)); | |
931 | *stringlist++ = p; | |
932 | p += len; | |
933 | diff --git a/testdata/testinput2 b/testdata/testinput2 | |
934 | index 00ffe32..967a241 100644 | |
935 | --- a/testdata/testinput2 | |
936 | +++ b/testdata/testinput2 | |
937 | @@ -4232,4 +4232,7 @@ backtracking verbs. --/ | |
938 | /(?<A>)(?J:(?<B>)(?<B>))(?<C>)/ | |
939 | \O\CC | |
940 | ||
941 | +/(?=a\K)/ | |
942 | + ring bpattingbobnd $ 1,oern cou \rb\L | |
943 | + | |
944 | /-- End of testinput2 --/ | |
945 | diff --git a/testdata/testoutput2 b/testdata/testoutput2 | |
946 | index ffb4466..5fb28d5 100644 | |
947 | --- a/testdata/testoutput2 | |
948 | +++ b/testdata/testoutput2 | |
949 | @@ -14644,4 +14644,10 @@ No match | |
950 | Matched, but too many substrings | |
951 | copy substring C failed -7 | |
952 | ||
953 | +/(?=a\K)/ | |
954 | + ring bpattingbobnd $ 1,oern cou \rb\L | |
955 | +Start of matched string is beyond its end - displaying from end to start. | |
956 | + 0: a | |
957 | + 0L | |
958 | + | |
959 | /-- End of testinput2 --/ | |
960 | -- | |
961 | 2.5.0 | |
962 | ||
963 | From 3da5528b47b88c32224cf9d14d8a4e80cd7a0815 Mon Sep 17 00:00:00 2001 | |
964 | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | |
965 | Date: Sat, 6 Feb 2016 16:54:14 +0000 | |
966 | Subject: [PATCH] Fix pcretest bad behaviour for callout in lookbehind. | |
967 | MIME-Version: 1.0 | |
968 | Content-Type: text/plain; charset=UTF-8 | |
969 | Content-Transfer-Encoding: 8bit | |
970 | ||
971 | git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1625 2f5784b3-3f2a-0410-8824-cb99058d5e15 | |
972 | ||
973 | Petr Písař: Ported to 8.38. | |
974 | ||
975 | diff --git a/pcretest.c b/pcretest.c | |
976 | index 488e419..63869fd 100644 | |
977 | --- a/pcretest.c | |
978 | +++ b/pcretest.c | |
979 | @@ -2250,7 +2250,7 @@ data is not zero. */ | |
980 | static int callout(pcre_callout_block *cb) | |
981 | { | |
982 | FILE *f = (first_callout | callout_extra)? outfile : NULL; | |
983 | -int i, pre_start, post_start, subject_length; | |
984 | +int i, current_position, pre_start, post_start, subject_length; | |
985 | ||
986 | if (callout_extra) | |
987 | { | |
988 | @@ -2280,14 +2280,19 @@ printed lengths of the substrings. */ | |
989 | ||
990 | if (f != NULL) fprintf(f, "--->"); | |
991 | ||
992 | +/* If a lookbehind is involved, the current position may be earlier than the | |
993 | +match start. If so, use the match start instead. */ | |
994 | + | |
995 | +current_position = (cb->current_position >= cb->start_match)? | |
996 | + cb->current_position : cb->start_match; | |
997 | + | |
998 | PCHARS(pre_start, cb->subject, 0, cb->start_match, f); | |
999 | PCHARS(post_start, cb->subject, cb->start_match, | |
1000 | - cb->current_position - cb->start_match, f); | |
1001 | + current_position - cb->start_match, f); | |
1002 | ||
1003 | PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL); | |
1004 | ||
1005 | -PCHARSV(cb->subject, cb->current_position, | |
1006 | - cb->subject_length - cb->current_position, f); | |
1007 | +PCHARSV(cb->subject, current_position, cb->subject_length - current_position, f); | |
1008 | ||
1009 | if (f != NULL) fprintf(f, "\n"); | |
1010 | ||
1011 | @@ -5740,3 +5745,4 @@ return yield; | |
1012 | } | |
1013 | ||
1014 | /* End of pcretest.c */ | |
1015 | + | |
1016 | diff --git a/testdata/testinput2 b/testdata/testinput2 | |
1017 | index 967a241..086e0f4 100644 | |
1018 | --- a/testdata/testinput2 | |
1019 | +++ b/testdata/testinput2 | |
1020 | @@ -4235,4 +4235,8 @@ backtracking verbs. --/ | |
1021 | /(?=a\K)/ | |
1022 | ring bpattingbobnd $ 1,oern cou \rb\L | |
1023 | ||
1024 | +/(?<=((?C)0))/ | |
1025 | + 9010 | |
1026 | + abcd | |
1027 | + | |
1028 | /-- End of testinput2 --/ | |
1029 | diff --git a/testdata/testoutput2 b/testdata/testoutput2 | |
1030 | index 5fb28d5..d414a72 100644 | |
1031 | --- a/testdata/testoutput2 | |
1032 | +++ b/testdata/testoutput2 | |
1033 | @@ -14650,4 +14650,19 @@ Start of matched string is beyond its end - displaying from end to start. | |
1034 | 0: a | |
1035 | 0L | |
1036 | ||
1037 | +/(?<=((?C)0))/ | |
1038 | + 9010 | |
1039 | +--->9010 | |
1040 | + 0 ^ 0 | |
1041 | + 0 ^ 0 | |
1042 | + 0: | |
1043 | + 1: 0 | |
1044 | + abcd | |
1045 | +--->abcd | |
1046 | + 0 ^ 0 | |
1047 | + 0 ^ 0 | |
1048 | + 0 ^ 0 | |
1049 | + 0 ^ 0 | |
1050 | +No match | |
1051 | + | |
1052 | /-- End of testinput2 --/ | |
1053 | -- | |
1054 | 2.5.0 | |
1055 | ||
1056 | From 943a5105b9fe2842851003f692c7077a6cdbeefe Mon Sep 17 00:00:00 2001 | |
1057 | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | |
1058 | Date: Wed, 10 Feb 2016 19:13:17 +0000 | |
1059 | Subject: [PATCH] Fix workspace overflow for (*ACCEPT) with deeply nested | |
1060 | parentheses. | |
1061 | MIME-Version: 1.0 | |
1062 | Content-Type: text/plain; charset=UTF-8 | |
1063 | Content-Transfer-Encoding: 8bit | |
1064 | ||
1065 | git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1631 2f5784b3-3f2a-0410-8824-cb99058d5e15 | |
1066 | ||
1067 | Petr Písař: Ported to 8.38. | |
1068 | ||
1069 | diff --git a/pcre_compile.c b/pcre_compile.c | |
1070 | index b9a239e..5019854 100644 | |
1071 | --- a/pcre_compile.c | |
1072 | +++ b/pcre_compile.c | |
1073 | @@ -6,7 +6,7 @@ | |
1074 | and semantics are as close as possible to those of the Perl 5 language. | |
1075 | ||
1076 | Written by Philip Hazel | |
1077 | - Copyright (c) 1997-2014 University of Cambridge | |
1078 | + Copyright (c) 1997-2016 University of Cambridge | |
1079 | ||
1080 | ----------------------------------------------------------------------------- | |
1081 | Redistribution and use in source and binary forms, with or without | |
1082 | @@ -560,6 +560,7 @@ static const char error_texts[] = | |
1083 | /* 85 */ | |
1084 | "parentheses are too deeply nested (stack check)\0" | |
1085 | "digits missing in \\x{} or \\o{}\0" | |
1086 | + "regular expression is too complicated\0" | |
1087 | ; | |
1088 | ||
1089 | /* Table to identify digits and hex digits. This is used when compiling | |
1090 | @@ -4591,7 +4592,8 @@ for (;; ptr++) | |
1091 | if (code > cd->start_workspace + cd->workspace_size - | |
1092 | WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */ | |
1093 | { | |
1094 | - *errorcodeptr = ERR52; | |
1095 | + *errorcodeptr = (code >= cd->start_workspace + cd->workspace_size)? | |
1096 | + ERR52 : ERR87; | |
1097 | goto FAILED; | |
1098 | } | |
1099 | ||
1100 | @@ -6626,8 +6628,21 @@ for (;; ptr++) | |
1101 | cd->had_accept = TRUE; | |
1102 | for (oc = cd->open_caps; oc != NULL; oc = oc->next) | |
1103 | { | |
1104 | - *code++ = OP_CLOSE; | |
1105 | - PUT2INC(code, 0, oc->number); | |
1106 | + if (lengthptr != NULL) | |
1107 | + { | |
1108 | +#ifdef COMPILE_PCRE8 | |
1109 | + *lengthptr += 1 + IMM2_SIZE; | |
1110 | +#elif defined COMPILE_PCRE16 | |
1111 | + *lengthptr += 2 + IMM2_SIZE; | |
1112 | +#elif defined COMPILE_PCRE32 | |
1113 | + *lengthptr += 4 + IMM2_SIZE; | |
1114 | +#endif | |
1115 | + } | |
1116 | + else | |
1117 | + { | |
1118 | + *code++ = OP_CLOSE; | |
1119 | + PUT2INC(code, 0, oc->number); | |
1120 | + } | |
1121 | } | |
1122 | setverb = *code++ = | |
1123 | (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT; | |
1124 | diff --git a/pcre_internal.h b/pcre_internal.h | |
1125 | index f7a5ee7..dbfe80e 100644 | |
1126 | --- a/pcre_internal.h | |
1127 | +++ b/pcre_internal.h | |
1128 | @@ -7,7 +7,7 @@ | |
1129 | and semantics are as close as possible to those of the Perl 5 language. | |
1130 | ||
1131 | Written by Philip Hazel | |
1132 | - Copyright (c) 1997-2014 University of Cambridge | |
1133 | + Copyright (c) 1997-2016 University of Cambridge | |
1134 | ||
1135 | ----------------------------------------------------------------------------- | |
1136 | Redistribution and use in source and binary forms, with or without | |
1137 | @@ -2289,7 +2289,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, | |
1138 | ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, | |
1139 | ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, | |
1140 | ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, | |
1141 | - ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERRCOUNT }; | |
1142 | + ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERRCOUNT }; | |
1143 | ||
1144 | /* JIT compiling modes. The function list is indexed by them. */ | |
1145 | ||
1146 | diff --git a/pcreposix.c b/pcreposix.c | |
1147 | index dcc13ef..55b6ddc 100644 | |
1148 | --- a/pcreposix.c | |
1149 | +++ b/pcreposix.c | |
1150 | @@ -6,7 +6,7 @@ | |
1151 | and semantics are as close as possible to those of the Perl 5 language. | |
1152 | ||
1153 | Written by Philip Hazel | |
1154 | - Copyright (c) 1997-2014 University of Cambridge | |
1155 | + Copyright (c) 1997-2016 University of Cambridge | |
1156 | ||
1157 | ----------------------------------------------------------------------------- | |
1158 | Redistribution and use in source and binary forms, with or without | |
1159 | @@ -173,7 +173,8 @@ static const int eint[] = { | |
1160 | REG_BADPAT, /* group name must start with a non-digit */ | |
1161 | /* 85 */ | |
1162 | REG_BADPAT, /* parentheses too deeply nested (stack check) */ | |
1163 | - REG_BADPAT /* missing digits in \x{} or \o{} */ | |
1164 | + REG_BADPAT, /* missing digits in \x{} or \o{} */ | |
1165 | + REG_BADPAT /* pattern too complicated */ | |
1166 | }; | |
1167 | ||
1168 | /* Table of texts corresponding to POSIX error codes */ | |
1169 | diff --git a/testdata/testinput11 b/testdata/testinput11 | |
1170 | index ac9d228..6f0989a 100644 | |
1171 | --- a/testdata/testinput11 | |
1172 | +++ b/testdata/testinput11 | |
1173 | @@ -138,4 +138,6 @@ is required for these tests. --/ | |
1174 | ||
1175 | /.((?2)(?R)\1)()/B | |
1176 | ||
1177 | +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ | |
1178 | + | |
1179 | /-- End of testinput11 --/ | |
1180 | diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 | |
1181 | index 280692e..3c485da 100644 | |
1182 | --- a/testdata/testoutput11-16 | |
1183 | +++ b/testdata/testoutput11-16 | |
1184 | @@ -765,4 +765,7 @@ Memory allocation (code space): 14 | |
1185 | 25 End | |
1186 | ------------------------------------------------------------------ | |
1187 | ||
1188 | +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ | |
1189 | +Failed: regular expression is too complicated at offset 490 | |
1190 | + | |
1191 | /-- End of testinput11 --/ | |
1192 | diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 | |
1193 | index cdbda74..e19518d 100644 | |
1194 | --- a/testdata/testoutput11-32 | |
1195 | +++ b/testdata/testoutput11-32 | |
1196 | @@ -765,4 +765,7 @@ Memory allocation (code space): 28 | |
1197 | 25 End | |
1198 | ------------------------------------------------------------------ | |
1199 | ||
1200 | +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ | |
1201 | +Failed: missing ) at offset 509 | |
1202 | + | |
1203 | /-- End of testinput11 --/ | |
1204 | diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8 | |
1205 | index cb37896..5a4fbb2 100644 | |
1206 | --- a/testdata/testoutput11-8 | |
1207 | +++ b/testdata/testoutput11-8 | |
1208 | @@ -765,4 +765,7 @@ Memory allocation (code space): 10 | |
1209 | 38 End | |
1210 | ------------------------------------------------------------------ | |
1211 | ||
1212 | +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ | |
1213 | +Failed: missing ) at offset 509 | |
1214 | + | |
1215 | /-- End of testinput11 --/ | |
1216 | -- | |
1217 | 2.5.0 | |
1218 | ||
1219 | From b7537308b7c758f33c347cb0bec62754c43c271f Mon Sep 17 00:00:00 2001 | |
1220 | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | |
1221 | Date: Sat, 27 Feb 2016 17:38:11 +0000 | |
1222 | Subject: [PATCH] Yet another duplicate name bugfix by overestimating the | |
1223 | memory needed (i.e. another hack - PCRE2 has this "properly" fixed). | |
1224 | MIME-Version: 1.0 | |
1225 | Content-Type: text/plain; charset=UTF-8 | |
1226 | Content-Transfer-Encoding: 8bit | |
1227 | ||
1228 | git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1636 2f5784b3-3f2a-0410-8824-cb99058d5e15 | |
1229 | ||
1230 | Petr Písař: Ported to 8.38. | |
1231 | ||
1232 | diff --git a/pcre_compile.c b/pcre_compile.c | |
1233 | index 5019854..4ffea0c 100644 | |
1234 | --- a/pcre_compile.c | |
1235 | +++ b/pcre_compile.c | |
1236 | @@ -7311,7 +7311,12 @@ for (;; ptr++) | |
1237 | so far in order to get the number. If the name is not found, leave | |
1238 | the value of recno as 0 for a forward reference. */ | |
1239 | ||
1240 | - else | |
1241 | + /* This patch (removing "else") fixes a problem when a reference is | |
1242 | + to multiple identically named nested groups from within the nest. | |
1243 | + Once again, it is not the "proper" fix, and it results in an | |
1244 | + over-allocation of memory. */ | |
1245 | + | |
1246 | + /* else */ | |
1247 | { | |
1248 | ng = cd->named_groups; | |
1249 | for (i = 0; i < cd->names_found; i++, ng++) | |
1250 | diff --git a/testdata/testinput2 b/testdata/testinput2 | |
1251 | index 086e0f4..c805f5f 100644 | |
1252 | --- a/testdata/testinput2 | |
1253 | +++ b/testdata/testinput2 | |
1254 | @@ -4239,4 +4239,6 @@ backtracking verbs. --/ | |
1255 | 9010 | |
1256 | abcd | |
1257 | ||
1258 | +/((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/ | |
1259 | + | |
1260 | /-- End of testinput2 --/ | |
1261 | diff --git a/testdata/testoutput2 b/testdata/testoutput2 | |
1262 | index d414a72..800a72f 100644 | |
1263 | --- a/testdata/testoutput2 | |
1264 | +++ b/testdata/testoutput2 | |
1265 | @@ -14665,4 +14665,6 @@ Start of matched string is beyond its end - displaying from end to start. | |
1266 | 0 ^ 0 | |
1267 | No match | |
1268 | ||
1269 | +/((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/ | |
1270 | + | |
1271 | /-- End of testinput2 --/ | |
1272 | -- | |
1273 | 2.5.0 | |
1274 | ||
1275 | From 0fc2edb79b3815c6511fd75c36a57893e4acaee6 Mon Sep 17 00:00:00 2001 | |
1276 | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | |
1277 | Date: Sat, 27 Feb 2016 17:55:24 +0000 | |
1278 | Subject: [PATCH] Fix pcretest loop for global matching with an ovector size | |
1279 | less than 2. | |
1280 | MIME-Version: 1.0 | |
1281 | Content-Type: text/plain; charset=UTF-8 | |
1282 | Content-Transfer-Encoding: 8bit | |
1283 | ||
1284 | git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1637 2f5784b3-3f2a-0410-8824-cb99058d5e15 | |
1285 | ||
1286 | Petr Písař: Ported to 8.38. | |
1287 | ||
1288 | diff --git a/pcretest.c b/pcretest.c | |
1289 | index 63869fd..78ef517 100644 | |
1290 | --- a/pcretest.c | |
1291 | +++ b/pcretest.c | |
1292 | @@ -5617,6 +5617,12 @@ while (!done) | |
1293 | break; | |
1294 | } | |
1295 | ||
1296 | + if (use_size_offsets < 2) | |
1297 | + { | |
1298 | + fprintf(outfile, "Cannot do global matching with an ovector size < 2\n"); | |
1299 | + break; | |
1300 | + } | |
1301 | + | |
1302 | /* If we have matched an empty string, first check to see if we are at | |
1303 | the end of the subject. If so, the /g loop is over. Otherwise, mimic what | |
1304 | Perl's /g options does. This turns out to be rather cunning. First we set | |
1305 | -- | |
1306 | 2.5.0 | |
1307 | ||
1308 | From b3db1b7de5cfaa026ec2bc4a393129461a0f5c57 Mon Sep 17 00:00:00 2001 | |
1309 | From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | |
1310 | Date: Sat, 27 Feb 2016 18:44:41 +0000 | |
1311 | Subject: [PATCH] Fix non-diagnosis of missing assertion after (?(?C). | |
1312 | MIME-Version: 1.0 | |
1313 | Content-Type: text/plain; charset=UTF-8 | |
1314 | Content-Transfer-Encoding: 8bit | |
1315 | ||
1316 | git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1638 2f5784b3-3f2a-0410-8824-cb99058d5e15 | |
1317 | ||
1318 | Petr Písař: Ported to 8.38. | |
1319 | ||
1320 | diff --git a/pcre_compile.c b/pcre_compile.c | |
1321 | index 4ffea0c..254c629 100644 | |
1322 | --- a/pcre_compile.c | |
1323 | +++ b/pcre_compile.c | |
1324 | @@ -485,7 +485,7 @@ static const char error_texts[] = | |
1325 | "lookbehind assertion is not fixed length\0" | |
1326 | "malformed number or name after (?(\0" | |
1327 | "conditional group contains more than two branches\0" | |
1328 | - "assertion expected after (?(\0" | |
1329 | + "assertion expected after (?( or (?(?C)\0" | |
1330 | "(?R or (?[+-]digits must be followed by )\0" | |
1331 | /* 30 */ | |
1332 | "unknown POSIX class name\0" | |
1333 | @@ -6771,6 +6771,15 @@ for (;; ptr++) | |
1334 | for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break; | |
1335 | if (ptr[i] == CHAR_RIGHT_PARENTHESIS) | |
1336 | tempptr += i + 1; | |
1337 | + | |
1338 | + /* tempptr should now be pointing to the opening parenthesis of the | |
1339 | + assertion condition. */ | |
1340 | + | |
1341 | + if (*tempptr != CHAR_LEFT_PARENTHESIS) | |
1342 | + { | |
1343 | + *errorcodeptr = ERR28; | |
1344 | + goto FAILED; | |
1345 | + } | |
1346 | } | |
1347 | ||
1348 | /* For conditions that are assertions, check the syntax, and then exit | |
1349 | diff --git a/testdata/testinput2 b/testdata/testinput2 | |
1350 | index c805f5f..75e402e 100644 | |
1351 | --- a/testdata/testinput2 | |
1352 | +++ b/testdata/testinput2 | |
1353 | @@ -4241,4 +4241,6 @@ backtracking verbs. --/ | |
1354 | ||
1355 | /((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/ | |
1356 | ||
1357 | +/\N(?(?C)0?!.)*/ | |
1358 | + | |
1359 | /-- End of testinput2 --/ | |
1360 | diff --git a/testdata/testoutput2 b/testdata/testoutput2 | |
1361 | index 800a72f..5e88d1a 100644 | |
1362 | --- a/testdata/testoutput2 | |
1363 | +++ b/testdata/testoutput2 | |
1364 | @@ -555,13 +555,13 @@ Failed: malformed number or name after (?( at offset 4 | |
1365 | Failed: malformed number or name after (?( at offset 4 | |
1366 | ||
1367 | /(?(?i))/ | |
1368 | -Failed: assertion expected after (?( at offset 3 | |
1369 | +Failed: assertion expected after (?( or (?(?C) at offset 3 | |
1370 | ||
1371 | /(?(abc))/ | |
1372 | Failed: reference to non-existent subpattern at offset 7 | |
1373 | ||
1374 | /(?(?<ab))/ | |
1375 | -Failed: assertion expected after (?( at offset 3 | |
1376 | +Failed: assertion expected after (?( or (?(?C) at offset 3 | |
1377 | ||
1378 | /((?s)blah)\s+\1/I | |
1379 | Capturing subpattern count = 1 | |
1380 | @@ -7870,7 +7870,7 @@ No match | |
1381 | Failed: malformed number or name after (?( at offset 6 | |
1382 | ||
1383 | /(?(''))/ | |
1384 | -Failed: assertion expected after (?( at offset 4 | |
1385 | +Failed: assertion expected after (?( or (?(?C) at offset 4 | |
1386 | ||
1387 | /(?('R')stuff)/ | |
1388 | Failed: reference to non-existent subpattern at offset 7 | |
1389 | @@ -14346,7 +14346,7 @@ No match | |
1390 | "((?2)+)((?1))" | |
1391 | ||
1392 | "(?(?<E>.*!.*)?)" | |
1393 | -Failed: assertion expected after (?( at offset 3 | |
1394 | +Failed: assertion expected after (?( or (?(?C) at offset 3 | |
1395 | ||
1396 | "X((?2)()*+){2}+"BZ | |
1397 | ------------------------------------------------------------------ | |
1398 | @@ -14667,4 +14667,7 @@ No match | |
1399 | ||
1400 | /((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/ | |
1401 | ||
1402 | +/\N(?(?C)0?!.)*/ | |
1403 | +Failed: assertion expected after (?( or (?(?C) at offset 4 | |
1404 | + | |
1405 | /-- End of testinput2 --/ | |
1406 | -- | |
1407 | 2.5.0 | |
1408 |