From: Richard Levitte Date: Tue, 25 May 2021 08:29:24 +0000 (+0200) Subject: util/fix-doc-nits: Fix link detection in collectnames() to be kinder X-Git-Tag: openssl-3.0.0-beta1~369 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=0e7e3b9b9d2d0a49097b4e224098036d3e6b8087;p=thirdparty%2Fopenssl.git util/fix-doc-nits: Fix link detection in collectnames() to be kinder The way the links were parsed out of the contents caused a regexp recursion. The easiest way to deal with it is to find all markup using $markup_re, and then parsing out the L markups and add them to the links array. Fixes #15449 Reviewed-by: Paul Dale (Merged from https://github.com/openssl/openssl/pull/15450) --- diff --git a/util/find-doc-nits b/util/find-doc-nits index c62307a9ce2..7498ac6865a 100755 --- a/util/find-doc-nits +++ b/util/find-doc-nits @@ -1000,16 +1000,27 @@ sub collectnames { } } - my @links = - $podinfo{contents} =~ /L< - # if the link is of the form L, - # then remove 'something'. Note that 'something' - # may contain POD codes as well... - (?:(?:[^\|]|<[^>]*>)*\|)? - # we're only interested in references that have - # a one digit section number - ([^\/>\(]+\(\d\)) - /gx; + my @links = (); + # Don't use this regexp directly on $podinfo{contents}, as it causes + # a regexp recursion, which fails on really big PODs. Instead, use + # $markup_re to pick up general markup, and use this regexp to check + # that the markup that was found is indeed a link. + my $linkre = qr/L< + # if the link is of the form L, + # then remove 'something'. Note that 'something' + # may contain POD codes as well... + (?:(?:[^\|]|<[^>]*>)*\|)? + # we're only interested in references that have + # a one digit section number + ([^\/>\(]+\(\d\)) + /x; + while ( $podinfo{contents} =~ /$markup_re/msg ) { + my $x = $1; + + if ($x =~ $linkre) { + push @links, $1; + } + } $link_map{$filename} = [ @links ]; }