From: Paul Eggert Date: Tue, 26 Jan 2021 17:23:54 +0000 (-0800) Subject: expr: fix bug with unmatched \(...\) X-Git-Tag: v9.0~153 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=735083ba24878075235007b4417982ad5700436d;p=thirdparty%2Fcoreutils.git expr: fix bug with unmatched \(...\) Problem reported by Qiuhao Li. * NEWS: Mention this. * doc/coreutils.texi (String expressions): Document the correct behavior, which POSIX requires. * src/expr.c (docolon): Treat unmatched \(...\) as empty. * tests/misc/expr.pl: New test. --- diff --git a/NEWS b/NEWS index 0929f0ae2f..351a2983aa 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,9 @@ GNU coreutils NEWS -*- outline -*- heavily changed during the run. [bug introduced in coreutils-8.25] + expr no longer mishandles unmatched \(...\) in regular expressions. + [bug introduced in coreutils-6.0] + ls no longer crashes when printing the SELinux context for unstatable files. [bug introduced in coreutils-6.9.91] diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 94c9fbfa56..c90c4d5128 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -13559,12 +13559,14 @@ second is considered to be a (basic, a la GNU @code{grep}) regular expression, with a @code{^} implicitly prepended. The first argument is then matched against this regular expression. -If the match succeeds and @var{regex} uses @samp{\(} and @samp{\)}, the -@code{:} expression returns the part of @var{string} that matched the -subexpression; otherwise, it returns the number of characters matched. - -If the match fails, the @code{:} operator returns the null string if -@samp{\(} and @samp{\)} are used in @var{regex}, otherwise 0. +If @var{regex} does not use @samp{\(} and @samp{\)}, the @code{:} +expression returns the number of characters matched, or 0 if the match +fails. + +If @var{regex} uses @samp{\(} and @samp{\)}, the @code{:} expression +returns the part of @var{string} that matched the subexpression, or +the null string if the match failed or the subexpression did not +contribute to the match. @kindex \( @r{regexp operator} Only the first @samp{\( @dots{} \)} pair is relevant to the return diff --git a/src/expr.c b/src/expr.c index afd8b9ca56..4893d948f3 100644 --- a/src/expr.c +++ b/src/expr.c @@ -614,8 +614,13 @@ docolon (VALUE *sv, VALUE *pv) /* Were \(...\) used? */ if (re_buffer.re_nsub > 0) { - sv->u.s[re_regs.end[1]] = '\0'; - v = str_value (sv->u.s + re_regs.start[1]); + if (re_regs.end[1] < 0) + v = str_value (""); + else + { + sv->u.s[re_regs.end[1]] = '\0'; + v = str_value (sv->u.s + re_regs.start[1]); + } } else { diff --git a/tests/misc/expr.pl b/tests/misc/expr.pl index 31f903aff1..0e547af9fd 100755 --- a/tests/misc/expr.pl +++ b/tests/misc/expr.pl @@ -84,6 +84,9 @@ my @Tests = # In 5.94 and earlier, anchors incorrectly matched newlines. ['anchor', "'a\nb' : 'a\$'", {OUT => '0'}, {EXIT => 1}], + # In 8.32, \( ... \) that did not match caused memory errors. + ['emptysub', '"a" : "\\(b\\)*"', {OUT => ''}, {EXIT => 1}], + # These tests are taken from grep/tests/bre.tests. ['bre1', '"abc" : "a\\(b\\)c"', {OUT => 'b'}], ['bre2', '"a(" : "a("', {OUT => '2'}],