From: Eric Blake Date: Wed, 3 Oct 2007 21:26:51 +0000 (-0600) Subject: Another round of regex avoidance. X-Git-Tag: v2.62~235 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=987a33376699a92cb00962aedd4632d05bc7cef6;p=thirdparty%2Fautoconf.git Another round of regex avoidance. * lib/m4sugar/m4sugar.m4 (m4_cr_alnum, m4_cr_all) (_m4_define_cr_not, m4_cr_not_letters, m4_cr_not_LETTERS) (m4_cr_not_Letters, m4_cr_not_digits, m4_cr_not_alnum) (m4_cr_not_symbols1, m4_cr_not_symbols2): New macros, implementing character ranges useful in m4_translit. (m4_toupper, m4_tolower): Optimize the constant portion of definition. * lib/m4sugar/m4sh.m4 (AS_LITERAL_IF): Also reject @S|@ because it creates $, and reject [] thanks to AS_TR_SH rewrite. (AS_TR_SH, AS_TR_CPP): Use just translit, not bpatsubst. (AS_ESCAPE): Factor... (_AS_ESCAPE): ...into new macro, with second argument required. Avoid regex in common case. (_AS_QUOTE): Use new macro. Signed-off-by: Eric Blake --- diff --git a/ChangeLog b/ChangeLog index 931edc91..b15f8cce 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,21 @@ 2007-10-03 Eric Blake + Another round of regex avoidance. + * lib/m4sugar/m4sugar.m4 (m4_cr_alnum, m4_cr_all) + (_m4_define_cr_not, m4_cr_not_letters, m4_cr_not_LETTERS) + (m4_cr_not_Letters, m4_cr_not_digits, m4_cr_not_alnum) + (m4_cr_not_symbols1, m4_cr_not_symbols2): New macros, implementing + character ranges useful in m4_translit. + (m4_toupper, m4_tolower): Optimize the constant portion of + definition. + * lib/m4sugar/m4sh.m4 (AS_LITERAL_IF): Also reject @S|@ because it + creates $, and reject [] thanks to AS_TR_SH rewrite. + (AS_TR_SH, AS_TR_CPP): Use just translit, not bpatsubst. + (AS_ESCAPE): Factor... + (_AS_ESCAPE): ...into new macro, with second argument required. + Avoid regex in common case. + (_AS_QUOTE): Use new macro. + Whitespace cleanup. * lib/autoconf/types.m4: Avoid space-tab. * lib/m4sugar/m4sh.m4: Use tab consistently. diff --git a/lib/m4sugar/m4sh.m4 b/lib/m4sugar/m4sh.m4 index a29f9b93..b8d5c548 100644 --- a/lib/m4sugar/m4sh.m4 +++ b/lib/m4sugar/m4sh.m4 @@ -560,10 +560,15 @@ $as_unset $1 || test "${$1+set}" != set || { $1=$2; export $1; }]) # AS_ESCAPE(STRING, [CHARS = $"`\]) # --------------------------------- # Escape the CHARS in STRING. +# +# Avoid the m4_bpatsubst if there are no interesting characters to escape. +# _AS_ESCAPE bypasses argument defaulting. m4_define([AS_ESCAPE], -[m4_bpatsubst([$1], - m4_dquote(m4_default([$2], [\"$`])), - [\\\&])]) +[_$0([$1], m4_default([$2], [\"$`]))]) +m4_define([_AS_ESCAPE], +[m4_if(m4_len([$1]), + m4_len(m4_translit([[$1]], [$2])), + [$1], [m4_bpatsubst([$1], [[$2]], [\\\&])])]) # _AS_QUOTE_IFELSE(STRING, IF-MODERN-QUOTATION, IF-OLD-QUOTATION) @@ -597,7 +602,7 @@ m4_define([_AS_QUOTE_IFELSE], # backslash all the quotes. m4_define([_AS_QUOTE], [_AS_QUOTE_IFELSE([$1], - [AS_ESCAPE([$1], m4_default([$2], [`""]))], + [_AS_ESCAPE([$1], m4_default([$2], [`""]))], [m4_warn([obsolete], [back quotes and double quotes must not be escaped in: $1])dnl $1])]) @@ -1228,12 +1233,31 @@ m4_popdef([AS_Prefix])dnl # This is an *approximation*: for instance EXPRESSION = `\$' is # definitely a literal, but will not be recognized as such. # +# Why do we reject EXPRESSION expanding with `[' or `]' as a literal? +# Because AS_TR_SH is MUCH faster if it can use m4_translit on literals +# instead of m4_bpatsubst; but m4_translit is much tougher to do safely +# if `[' is translated. +# +# Note that the quadrigraph @S|@ can result in non-literals, but outright +# rejecting all @ would make AC_INIT complain on its bug report address. +# # We used to use m4_bmatch(m4_quote($1), [[`$]], [$3], [$2]), but # profiling shows that it is faster to use m4_translit. +# +# Because the translit is stripping quotes, it must also neutralize anything +# that might be in a macro name, as well as comments and commas. All the +# problem characters are unified so that a single m4_index can scan the +# result. +# +# Rather than expand m4_defn every time AS_LITERAL_IF is expanded, we +# inline its expansion up front. m4_define([AS_LITERAL_IF], -[m4_if(m4_len(m4_quote($1)), - m4_len(m4_translit(m4_dquote(m4_quote($1)), [`$])), - [$2], [$3])]) +[m4_if(m4_eval(m4_index(m4_quote($1), [@S|@]) == -1), [0], [$3], + m4_index(m4_translit(m4_quote($1), + [[]`,#]]m4_dquote(m4_defn([m4_cr_symbols2]))[, + [$$$]), + [$]), [-1], [$2], + [$3])]) # AS_TMPDIR(PREFIX, [DIRECTORY = $TMPDIR [= /tmp]]) @@ -1413,11 +1437,22 @@ as_tr_sh="eval sed 'y%*+%pp%;s%[[^_$as_cr_alnum]]%_%g'" # Transform EXPRESSION into a valid shell variable name. # sh/m4 polymorphic. # Be sure to update the definition of `$as_tr_sh' if you change this. +# +# AS_LITERAL_IF guarantees that a literal does not have any nested quotes, +# once $1 is expanded. m4_translit silently uses only the first occurrence +# of a character that appears multiple times in argument 2, since we know +# that m4_cr_not_symbols2 also contains [ and ]. m4_translit also silently +# ignores characters in argument 3 that do not match argument 2; we use this +# fact to skip worrying about the length of m4_cr_not_symbols2. +# +# For speed, we inline the literal definitions that can be computed up front. m4_defun([AS_TR_SH], [AS_REQUIRE([_$0_PREPARE])dnl AS_LITERAL_IF([$1], - [m4_bpatsubst(m4_translit([[$1]], [*+], [pp]), - [[^a-zA-Z0-9_]], [_])], + [m4_translit([$1], [*+[]]]]dnl +m4_dquote(m4_dquote(m4_defn([m4_cr_not_symbols2])))[[, + [pp[]]]]dnl +m4_dquote(m4_dquote(m4_for(,1,255,,[[_]])))[[)], [`AS_ECHO(["$1"]) | $as_tr_sh`])]) @@ -1435,13 +1470,15 @@ as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[[^_$as_cr_alnum]]%_%g # Map EXPRESSION to an upper case string which is valid as rhs for a # `#define'. sh/m4 polymorphic. Be sure to update the definition # of `$as_tr_cpp' if you change this. +# +# See implementation comments in AS_TR_SH. m4_defun([AS_TR_CPP], [AS_REQUIRE([_$0_PREPARE])dnl AS_LITERAL_IF([$1], - [m4_bpatsubst(m4_translit([[$1]], - [*abcdefghijklmnopqrstuvwxyz], - [PABCDEFGHIJKLMNOPQRSTUVWXYZ]), - [[^A-Z0-9_]], [_])], + [m4_translit([$1], [*[]]]]dnl +m4_dquote(m4_dquote(m4_defn([m4_cr_letters])m4_defn([m4_cr_not_symbols2])))[[, + [P[]]]]dnl +m4_dquote(m4_dquote(m4_defn([m4_cr_LETTERS])m4_for(,1,255,,[[_]])))[[)], [`AS_ECHO(["$1"]) | $as_tr_cpp`])]) diff --git a/lib/m4sugar/m4sugar.m4 b/lib/m4sugar/m4sugar.m4 index c835fed9..5317617c 100644 --- a/lib/m4sugar/m4sugar.m4 +++ b/lib/m4sugar/m4sugar.m4 @@ -1367,6 +1367,14 @@ m4_defn([m4_cr_LETTERS])dnl m4_define([m4_cr_digits], [0123456789]) +# m4_cr_alnum +# ----------- +m4_define([m4_cr_alnum], +m4_defn([m4_cr_Letters])dnl +m4_defn([m4_cr_digits])dnl +) + + # m4_cr_symbols1 # m4_cr_symbols2 # ------------------------------- @@ -1379,6 +1387,52 @@ m4_defn([m4_cr_symbols1])dnl m4_defn([m4_cr_digits])dnl ) +# m4_cr_all +# --------- +# The character range representing everything, with `-' as the last +# character, since it is special to m4_translit. Use with care, because +# it contains characters special to M4 (fortunately, both ASCII and EBCDIC +# have [] in order, so m4_defn([m4_cr_all]) remains a valid string). It +# also contains characters special to terminals, so it should never be +# displayed in an error message. Also, attempts to map [ and ] to other +# characters via m4_translit must deal with the fact that m4_translit does +# not add quotes to the output. +# +# It is mainly useful in generating inverted character range maps, for use +# in places where m4_translit is faster than an equivalent m4_bpatsubst; +# the regex `[^a-z]' is equivalent to: +# m4_translit(m4_dquote(m4_defn([m4_cr_all])), [a-z]) +m4_define([m4_cr_all], +m4_translit(m4_dquote(m4_format(m4_dquote(m4_for( + ,1,255,,[[%c]]))m4_for([i],1,255,,[,i]))), [-])-) + + +# _m4_define_cr_not(CATEGORY) +# --------------------------- +# Define m4_cr_not_CATEGORY as the inverse of m4_cr_CATEGORY. +m4_define([_m4_define_cr_not], +[m4_define([m4_cr_not_$1], + m4_translit(m4_dquote(m4_defn([m4_cr_all])), + m4_defn([m4_cr_$1])))]) + + +# m4_cr_not_letters +# m4_cr_not_LETTERS +# m4_cr_not_Letters +# m4_cr_not_digits +# m4_cr_not_alnum +# m4_cr_not_symbols1 +# m4_cr_not_symbols2 +# ------------------ +# Inverse character sets +_m4_define_cr_not([letters]) +_m4_define_cr_not([LETTERS]) +_m4_define_cr_not([Letters]) +_m4_define_cr_not([digits]) +_m4_define_cr_not([alnum]) +_m4_define_cr_not([symbols1]) +_m4_define_cr_not([symbols2]) + # m4_re_escape(STRING) # -------------------- @@ -1411,10 +1465,14 @@ m4_defn([m4_re_string])dnl # m4_toupper(STRING) # ------------------ # These macros convert STRING to lowercase or uppercase. +# +# Rather than expand the m4_defn each time, we inline them up front. m4_define([m4_tolower], -[m4_translit([$1], m4_defn([m4_cr_LETTERS]), m4_defn([m4_cr_letters]))]) +[m4_translit([$1], ]m4_dquote(m4_defn([m4_cr_LETTERS]))[, + ]m4_dquote(m4_defn([m4_cr_letters]))[)]) m4_define([m4_toupper], -[m4_translit([$1], m4_defn([m4_cr_letters]), m4_defn([m4_cr_LETTERS]))]) +[m4_translit([$1], ]m4_dquote(m4_defn([m4_cr_letters]))[, + ]m4_dquote(m4_defn([m4_cr_LETTERS]))[)]) # m4_split(STRING, [REGEXP])