* lib/m4sugar/m4sh.m4 (AS_LITERAL_IF): Rewrite without regex.
(_AS_QUOTE_IFELSE): Likewise.
* lib/m4sugar/m4sugar.m4 (m4_strip): Reduce from 3 to 2 regex.
(m4_bpatsubsts): Split...
(_m4_bpatsubsts): ...so that recursion can avoid patsubst on empty
regex.
(_m4_divert()): Define, to avoid m4 warning on `m4_divert'.
(m4_qlen): Optimize on short strings, to avoid regex.
(m4_sign): Avoid regex, and fix bug with `01' and `-0'.
* lib/autoconf/general.m4 (AC_CACHE_VAL): Rewrite without regex.
(AC_DEFINE_TRACE): Likewise.
Signed-off-by: Eric Blake <ebb9@byu.net>
+2007-09-29 Eric Blake <ebb9@byu.net>
+
+ Speed optimization: avoid m4 regex when other algorithms work.
+ * lib/m4sugar/m4sh.m4 (AS_LITERAL_IF): Rewrite without regex.
+ (_AS_QUOTE_IFELSE): Likewise.
+ * lib/m4sugar/m4sugar.m4 (m4_strip): Reduce from 3 to 2 regex.
+ (m4_bpatsubsts): Split...
+ (_m4_bpatsubsts): ...so that recursion can avoid patsubst on empty
+ regex.
+ (_m4_divert()): Define, to avoid m4 warning on `m4_divert'.
+ (m4_qlen): Optimize on short strings, to avoid regex.
+ (m4_sign): Avoid regex, and fix bug with `01' and `-0'.
+ * lib/autoconf/general.m4 (AC_CACHE_VAL): Rewrite without regex.
+ (AC_DEFINE_TRACE): Likewise.
+
2007-09-28 Eric Blake <ebb9@byu.net>
Oops - my earlier 'optimization' caused a regression.
# The name of shell var CACHE-ID must contain `_cv_' in order to get saved.
# Should be dnl'ed. Try to catch common mistakes.
m4_defun([AC_CACHE_VAL],
-[AS_LITERAL_IF([$1], [m4_bmatch(m4_quote($1), [_cv_], [],
- [AC_DIAGNOSE([syntax],
+[AS_LITERAL_IF([$1], [m4_if(m4_index(m4_quote($1), [_cv_]), [-1],
+ [AC_DIAGNOSE([syntax],
[$0($1, ...): suspicious cache-id, must contain _cv_ to be cached])])])dnl
-m4_bmatch([$2], [AC_DEFINE],
- [AC_DIAGNOSE([syntax],
+m4_if(m4_index([$2], [AC_DEFINE]), [-1], [],
+ [AC_DIAGNOSE([syntax],
[$0($1, ...): suspicious presence of an AC_DEFINE in the second argument, ]dnl
-[where no actions should be taken])],
- [AC_SUBST], [AC_DIAGNOSE([syntax],
+[where no actions should be taken])])dnl
+m4_if(m4_index([$2], [AC_SUBST]), [-1], [],
+ [AC_DIAGNOSE([syntax],
[$0($1, ...): suspicious presence of an AC_SUBST in the second argument, ]dnl
[where no actions should be taken])])dnl
AS_VAR_SET_IF([$1],
# ---------------------------
# This macro is a wrapper around AC_DEFINE_TRACE_LITERAL which filters
# out non literal symbols.
+#
+# m4_index is roughly 5 to 8 times faster than m4_bpatsubst.
m4_define([AC_DEFINE_TRACE],
-[AS_LITERAL_IF([$1], [AC_DEFINE_TRACE_LITERAL(m4_bpatsubst([[$1]], [(.*)]))])])
+[AS_LITERAL_IF([$1], [AC_DEFINE_TRACE_LITERAL(
+ m4_if(m4_index([[$1]], [(]), [-1], [[$1]],
+ [m4_substr([[$1]], [0], m4_index([[$1]], [(]))]))])])
# AC_DEFINE(VARIABLE, [VALUE], [DESCRIPTION])
# If STRING contains `\\' or `\$', it's modern.
# If STRING contains `\"' or `\`', it's old.
# Otherwise it's modern.
-# We use two quotes in the pattern to keep highlighting tools at peace.
+#
+# Profiling shows that m4_index is 5 to 8x faster than m4_bregexp. The
+# slower implementation used:
+# m4_bmatch([$1],
+# [\\[\\$]], [$2],
+# [\\[`"]], [$3],
+# [$2])
+# The current implementation caters to the common case of no backslashes,
+# to minimize m4_index expansions (hence the nested if).
m4_define([_AS_QUOTE_IFELSE],
-[m4_bmatch([$1],
- [\\[\\$]], [$2],
- [\\[`""]], [$3],
- [$2])])
+[m4_if(m4_index([$1], [\]), [-1], [$2],
+ [m4_if(m4_eval(m4_index([$1], [\\]) >= 0), [1], [$2],
+ m4_eval(m4_index([$1], [\$]) >= 0), [1], [$2],
+ m4_eval(m4_index([$1], [\`]) >= 0), [1], [$3],
+ m4_eval(m4_index([$1], [\"]) >= 0), [1], [$3],
+ [$2])])])
# _AS_QUOTE(STRING, [CHARS = `"])
# IF-INDIR, else IF-NOT-INDIR.
# This is an *approximation*: for instance EXPRESSION = `\$' is
# definitely a literal, but will not be recognized as such.
+#
+# We used to use m4_bmatch(m4_quote($1), [[`$]], [$3], [$2]), but
+# profiling shows that it is faster to use m4_translit.
m4_define([AS_LITERAL_IF],
-[m4_bmatch(m4_quote($1), [[`$]],
- [$3], [$2])])
+[m4_if(m4_len(m4_quote($1)),
+ m4_len(m4_translit(m4_dquote(m4_quote($1)), [`$])),
+ [$2], [$3])])
# AS_TMPDIR(PREFIX, [DIRECTORY = $TMPDIR [= /tmp]])
# I would have liked to name this macro `m4_bpatsubst', unfortunately,
# due to quotation problems, I need to double quote $1 below, therefore
# the anchors are broken :( I can't let users be trapped by that.
+#
+# Recall that m4_shiftn always results in an argument. Hence, we need
+# to distinguish between a final deletion vs. and ending recursion.
m4_define([m4_bpatsubsts],
[m4_if([$#], 0, [m4_fatal([$0: too few arguments: $#])],
[$#], 1, [m4_fatal([$0: too few arguments: $#: $1])],
[$#], 2, [m4_builtin([patsubst], $@)],
+ [_$0($@m4_if(m4_eval($# & 1), 0, [,]))])])
+m4_define([_m4_bpatsubsts],
+[m4_if([$#], 2, [$1],
[$0(m4_builtin([patsubst], [[$1]], [$2], [$3]),
m4_shiftn(3, $@))])])
# KILL is only used to suppress output.
m4_define([_m4_divert(KILL)], -1)
+# The empty diversion name is a synonym for 0.
+m4_define([_m4_divert()], 0)
+
# _m4_divert_n_stack
# ------------------
#
# Because we want to preserve active symbols, STRING must be double-quoted.
#
-# Then notice the 2 last patterns: they are in charge of removing the
+# First, notice that we guarantee trailing space. Why? Because regex
+# are greedy, and `.* ?' always groups the space into the .* portion.
+# The algorithm is simpler by avoiding `?' at the end. The algorithm
+# correctly strips everything if STRING is just ` '.
+#
+# Then notice the second pattern: it is in charge of removing the
# leading/trailing spaces. Why not just `[^ ]'? Because they are
-# applied to doubly quoted strings, i.e. more or less [[STRING]]. So
-# if there is a leading space in STRING, then it is the *third*
-# character, since there are two leading `['; equally for the last pattern.
+# applied to over-quoted strings, i.e. more or less [STRING], due
+# to the limitations of m4_bpatsubsts. So the leading space in STRING
+# is the *second* character; equally for the trailing space.
m4_define([m4_strip],
-[m4_bpatsubsts([[$1]],
+[m4_bpatsubsts([$1 ],
[[ ]+], [ ],
- [^\(..\) ], [\1],
- [ \(..\)$], [\1])])
+ [^. ?\(.*\) .$], [[[\1]]])])
# m4_normalize(STRING)
# m4_qlen(STRING)
# ---------------
# Expands to the length of STRING after autom4te converts all quadrigraphs.
+#
+# Avoid bpatsubsts for the common case of no quadrigraphs.
m4_define([m4_qlen],
-[m4_len(m4_bpatsubsts([[$1]], [@\(<:\|:>\|S|\|%:\)@], [P], [@&t@]))])
+[m4_if(m4_index([$1], [@]), [-1], [m4_len([$1])],
+ [m4_len(m4_bpatsubsts([[$1]], [@\(<:\|:>\|S|\|%:\)@], [P], [@&t@]))])])
# m4_qdelta(STRING)
# ----------
#
# The sign of the integer A.
+#
+# Rather than resort to eval or regex, we merely delete [0\t ], collapse
+# all other digits to 1, then use the first two characters to decide.
m4_define([m4_sign],
-[m4_bmatch([$1],
- [^-], -1,
- [^0+], 0,
- 1)])
+[m4_case(m4_substr(m4_translit([[$1]], [2-90 ], [11111111]), 0, 2),
+ [-1], [-1],
+ [-], [0],
+ [], [0],
+ [1])])
# m4_cmp(A, B)
# ------------