in text processing utilities.
$(SCRIPT inhibitQuickIndex = 1;)
+$(DIVC quickindex,
$(BOOKTABLE,
$(TR $(TH Category) $(TH Functions))
$(TR $(TD Matching) $(TD
$(TR $(TD Building) $(TD
$(LREF ctRegex)
$(LREF escaper)
- $(LREF _regex)
+ $(LREF regex)
))
$(TR $(TD Replace) $(TD
$(LREF replace)
$(LREF Splitter)
$(LREF StaticRegex)
))
-)
+))
$(SECTION Synopsis)
---
Checks of this sort of are better addressed by additional post-processing.
The basic syntax shouldn't surprise experienced users of regular expressions.
- For an introduction to $(D std.regex) see a
+ For an introduction to `std.regex` see a
$(HTTP dlang.org/regular-expression.html, short tour) of the module API
and its abilities.
$(REG_ROW \W, Matches any non-word character.)
$(REG_ROW \s, Matches whitespace, same as \p{White_Space}.)
$(REG_ROW \S, Matches any character except those recognized as $(I \s ). )
- $(REG_ROW \\, Matches \ character. )
+ $(REG_ROW \\\\, Matches \ character. )
$(REG_ROW \c where c is one of [|*+?(), Matches the character c itself. )
$(REG_ROW \p{PropertyName}, Matches a character that belongs
to the Unicode PropertyName set.
$(REG_START Character classes )
$(REG_TABLE
$(REG_TITLE Pattern element, Semantics )
- $(REG_ROW Any atom, Has the same meaning as outside of a character class.)
+ $(REG_ROW Any atom, Has the same meaning as outside of a character class,
+ except for ] which must be written as \\])
$(REG_ROW a-z, Includes characters a, b, c, ..., z. )
$(REG_ROW [a||b]$(COMMA) [a--b]$(COMMA) [a~~b]$(COMMA) [a$(AMP)$(AMP)b],
Where a, b are arbitrary classes, means union, set difference,
A set of functions in this module that do the substitution rely
on a simple format to guide the process. In particular the table below
- applies to the $(D format) argument of
+ applies to the `format` argument of
$(LREF replaceFirst) and $(LREF replaceAll).
The format string can reference parts of match using the following notation.
$(REG_TABLE
$(REG_TITLE Format specifier, Replaced by )
- $(REG_ROW $$(AMP), the whole match. )
+ $(REG_ROW $(DOLLAR)$(AMP), the whole match. )
$(REG_ROW $(DOLLAR)$(BACKTICK), part of input $(I preceding) the match. )
$(REG_ROW $', part of input $(I following) the match. )
$(REG_ROW $$, '$' character. )
$(REG_ROW \c $(COMMA) where c is any character, the character c itself. )
- $(REG_ROW \\, '\' character. )
+ $(REG_ROW \\\\, '\\' character. )
$(REG_ROW $(DOLLAR)1 .. $(DOLLAR)99, submatch number 1 to 99 respectively. )
)
$(SECTION Slicing and zero memory allocations orientation)
All matches returned by pattern matching functionality in this library
- are slices of the original input. The notable exception is the $(D replace)
+ are slices of the original input. The notable exception is the `replace`
family of functions that generate a new string from the input.
In cases where producing the replacement is the ultimate goal
Authors: Dmitry Olshansky,
- API and utility constructs are modeled after the original $(D std.regex)
+ API and utility constructs are modeled after the original `std.regex`
by Walter Bright and Andrei Alexandrescu.
- Source: $(PHOBOSSRC std/_regex/_package.d)
+ Source: $(PHOBOSSRC std/regex/package.d)
Macros:
REG_ROW = $(TR $(TD $(I $1 )) $(TD $+) )
import std.range.primitives, std.traits;
import std.regex.internal.ir;
-import std.regex.internal.thompson; //TODO: get rid of this dependency
-import std.typecons; // : Flag, Yes, No;
+import std.typecons : Flag, Yes, No;
/++
- $(D Regex) object holds regular expression pattern in compiled form.
+ `Regex` object holds regular expression pattern in compiled form.
- Instances of this object are constructed via calls to $(D regex).
+ Instances of this object are constructed via calls to `regex`.
This is an intended form for caching and storage of frequently
used regular expressions.
public alias Regex(Char) = std.regex.internal.ir.Regex!(Char);
/++
- A $(D StaticRegex) is $(D Regex) object that contains D code specially
+ A `StaticRegex` is `Regex` object that contains D code specially
generated at compile-time to speed up matching.
- Implicitly convertible to normal $(D Regex),
- however doing so will result in losing this additional capability.
+ No longer used, kept as alias to Regex for backwards compatibility.
+/
-public alias StaticRegex(Char) = std.regex.internal.ir.StaticRegex!(Char);
+public alias StaticRegex = Regex;
/++
Compile regular expression pattern for the later execution.
- Returns: $(D Regex) object that works on inputs having
- the same character width as $(D pattern).
+ Returns: `Regex` object that works on inputs having
+ the same character width as `pattern`.
Params:
pattern = A single regular expression to match.
patterns = An array of regular expression strings.
The resulting `Regex` object will match any expression;
use $(LREF whichPattern) to know which.
- flags = The _attributes (g, i, m and x accepted)
+ flags = The _attributes (g, i, m, s and x accepted)
- Throws: $(D RegexException) if there were any errors during compilation.
+ Throws: `RegexException` if there were any errors during compilation.
+/
-@trusted public auto regex(S)(S[] patterns, const(char)[] flags="")
+@trusted public auto regex(S : C[], C)(const S[] patterns, const(char)[] flags="")
if (isSomeString!(S))
{
import std.array : appender;
import std.functional : memoize;
enum cacheSize = 8; //TODO: invent nice interface to control regex caching
- S pat;
+ const(C)[] pat;
if (patterns.length > 1)
{
auto app = appender!S();
///
@system unittest
{
- // multi-pattern regex example
- auto multi = regex([`([a-z]+):(\d+)`, `(\d+),\d+`]); // multi regex
- auto m = "abc:43 12,34".matchAll(multi);
- assert(m.front.whichPattern == 1);
- assert(m.front[1] == "abc");
- assert(m.front[2] == "43");
- m.popFront();
- assert(m.front.whichPattern == 2);
- assert(m.front[1] == "12");
+ void test(S)()
+ {
+ // multi-pattern regex example
+ S[] arr = [`([a-z]+):(\d+)`, `(\d+),\d+`];
+ auto multi = regex(arr); // multi regex
+ S str = "abc:43 12,34";
+ auto m = str.matchAll(multi);
+ assert(m.front.whichPattern == 1);
+ assert(m.front[1] == "abc");
+ assert(m.front[2] == "43");
+ m.popFront();
+ assert(m.front.whichPattern == 2);
+ assert(m.front[1] == "12");
+ }
+
+ import std.meta : AliasSeq;
+ static foreach (C; AliasSeq!(string, wstring, dstring))
+ // Test with const array of patterns - see https://issues.dlang.org/show_bug.cgi?id=20301
+ static foreach (S; AliasSeq!(C, const C, immutable C))
+ test!S();
}
-public auto regexImpl(S)(S pattern, const(char)[] flags="")
-if (isSomeString!(S))
+@system unittest
+{
+ import std.conv : to;
+ import std.string : indexOf;
+
+ immutable pattern = "s+";
+ auto regexString = to!string(regex(pattern, "U"));
+ assert(regexString.length <= pattern.length + 100, "String representation shouldn't be unreasonably bloated.");
+ assert(indexOf(regexString, "s+") >= 0, "String representation should include pattern.");
+ assert(indexOf(regexString, 'U') >= 0, "String representation should include flags.");
+}
+
+public auto regexImpl(S)(const S pattern, const(char)[] flags="")
+if (isSomeString!(typeof(pattern)))
{
import std.regex.internal.parser : Parser, CodeGen;
auto parser = Parser!(Unqual!(typeof(pattern)), CodeGen)(pattern, flags);
}
+private struct CTRegexWrapper(Char)
+{
+ private immutable(Regex!Char)* re;
+
+ // allow code that expects mutable Regex to still work
+ // we stay "logically const"
+ @property @trusted ref getRe() const { return *cast(Regex!Char*) re; }
+ alias getRe this;
+}
+
template ctRegexImpl(alias pattern, string flags=[])
{
import std.regex.internal.backtracking, std.regex.internal.parser;
- enum r = regex(pattern, flags);
+ static immutable r = cast(immutable) regex(pattern, flags);
alias Char = BasicElementOf!(typeof(pattern));
enum source = ctGenRegExCode(r);
- alias Matcher = BacktrackingMatcher!(true);
- @trusted bool func(ref Matcher!Char matcher)
+ @trusted pure bool func(BacktrackingMatcher!Char matcher)
{
debug(std_regex_ctr) pragma(msg, source);
+ cast(void) matcher;
mixin(source);
}
- enum nr = StaticRegex!Char(r, &func);
+ static immutable staticRe =
+ cast(immutable) r.withFactory(new CtfeFactory!(BacktrackingMatcher, Char, func));
+ enum wrapper = CTRegexWrapper!Char(&staticRe);
+}
+
+@safe pure unittest
+{
+ // test compat for logical const workaround
+ static void test(StaticRegex!char)
+ {
+ }
+ enum re = ctRegex!``;
+ test(re);
+}
+
+@safe pure unittest
+{
+ auto re = ctRegex!`foo`;
+ assert(matchFirst("foo", re));
+
+ // test reassignment
+ re = ctRegex!`bar`;
+ assert(matchFirst("bar", re));
+ assert(!matchFirst("bar", ctRegex!`foo`));
}
/++
Params:
pattern = Regular expression
- flags = The _attributes (g, i, m and x accepted)
+ flags = The _attributes (g, i, m, s and x accepted)
+/
-public enum ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).nr;
+public enum ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).wrapper;
-enum isRegexFor(RegEx, R) = is(RegEx == Regex!(BasicElementOf!R))
- || is(RegEx == StaticRegex!(BasicElementOf!R));
+enum isRegexFor(RegEx, R) = is(immutable RegEx == immutable Regex!(BasicElementOf!R))
+ || is(RegEx : const(Regex!(BasicElementOf!R)))
+ || is(immutable RegEx == immutable StaticRegex!(BasicElementOf!R));
/++
- $(D Captures) object contains submatches captured during a call
- to $(D match) or iteration over $(D RegexMatch) range.
+ `Captures` object contains submatches captured during a call
+ to `match` or iteration over `RegexMatch` range.
First element of range is the whole match.
+/
-@trusted public struct Captures(R, DIndex = size_t)
+@trusted public struct Captures(R)
if (isSomeString!R)
{//@trusted because of union inside
- alias DataIndex = DIndex;
+ alias DataIndex = size_t;
alias String = R;
+ alias Store = SmallFixedArray!(Group!DataIndex, 3);
private:
import std.conv : text;
+ Store matches;
+ const(NamedGroup)[] _names;
R _input;
int _nMatch;
- enum smallString = 3;
- enum SMALL_MASK = 0x8000_0000, REF_MASK= 0x1FFF_FFFF;
- union
- {
- Group!DataIndex[] big_matches;
- Group!DataIndex[smallString] small_matches;
- }
uint _f, _b;
- uint _refcount; // ref count or SMALL MASK + num groups
- NamedGroup[] _names;
- this()(R input, uint n, NamedGroup[] named)
+ this(R input, uint n, const(NamedGroup)[] named)
{
_input = input;
_names = named;
- newMatches(n);
+ matches = Store(n);
_b = n;
_f = 0;
}
- this(alias Engine)(ref RegexMatch!(R,Engine) rmatch)
+ this(ref RegexMatch!R rmatch)
{
_input = rmatch._input;
- _names = rmatch._engine.re.dict;
- immutable n = rmatch._engine.re.ngroup;
- newMatches(n);
+ _names = rmatch._engine.pattern.dict;
+ immutable n = rmatch._engine.pattern.ngroup;
+ matches = Store(n);
_b = n;
_f = 0;
}
- @property inout(Group!DataIndex[]) matches() inout
+ inout(R) getMatch(size_t index) inout
{
- return (_refcount & SMALL_MASK) ? small_matches[0 .. _refcount & 0xFF] : big_matches;
- }
-
- void newMatches(uint n)
- {
- import core.stdc.stdlib : calloc;
- import std.exception : enforce;
- if (n > smallString)
- {
- auto p = cast(Group!DataIndex*) enforce(
- calloc(Group!DataIndex.sizeof,n),
- "Failed to allocate Captures struct"
- );
- big_matches = p[0 .. n];
- _refcount = 1;
- }
- else
- {
- _refcount = SMALL_MASK | n;
- }
- }
-
- bool unique()
- {
- return (_refcount & SMALL_MASK) || _refcount == 1;
+ auto m = &matches[index];
+ return *m ? _input[m.begin .. m.end] : null;
}
public:
- this(this)
- {
- if (!(_refcount & SMALL_MASK))
- {
- _refcount++;
- }
- }
- ~this()
- {
- import core.stdc.stdlib : free;
- if (!(_refcount & SMALL_MASK))
- {
- if (--_refcount == 0)
- {
- free(big_matches.ptr);
- big_matches = null;
- }
- }
- }
///Slice of input prior to the match.
@property R pre()
{
@property R front()
{
assert(_nMatch, "attempted to get front of an empty match");
- return _input[matches[_f].begin .. matches[_f].end];
+ return getMatch(_f);
}
///ditto
@property R back()
{
assert(_nMatch, "attempted to get back of an empty match");
- return _input[matches[_b - 1].begin .. matches[_b - 1].end];
+ return getMatch(_b - 1);
}
///ditto
inout(R) opIndex()(size_t i) inout
{
assert(_f + i < _b,text("requested submatch number ", i," is out of range"));
- assert(matches[_f + i].begin <= matches[_f + i].end,
- text("wrong match: ", matches[_f + i].begin, "..", matches[_f + i].end));
- return _input[matches[_f + i].begin .. matches[_f + i].end];
+ return getMatch(_f + i);
}
/++
if (isSomeString!String)
{
size_t index = lookupNamedGroup(_names, i);
- return _input[matches[index].begin .. matches[index].end];
+ return getMatch(index);
}
///Number of matches in this object.
assert(c.empty);
assert(!matchFirst("nothing", "something"));
+
+ // Captures that are not matched will be null.
+ c = matchFirst("ac", regex(`a(b)?c`));
+ assert(c);
+ assert(!c[1]);
+}
+
+@system unittest
+{
+ Captures!string c;
+ string s = "abc";
+ assert(cast(bool)(c = matchFirst(s, regex("d")))
+ || cast(bool)(c = matchFirst(s, regex("a"))));
+}
+
+// https://issues.dlang.org/show_bug.cgi?id=19979
+@system unittest
+{
+ auto c = matchFirst("bad", regex(`(^)(not )?bad($)`));
+ assert(c[0] && c[0].length == "bad".length);
+ assert(c[1] && !c[1].length);
+ assert(!c[2]);
+ assert(c[3] && !c[3].length);
}
/++
- A regex engine state, as returned by $(D match) family of functions.
+ A regex engine state, as returned by `match` family of functions.
Effectively it's a forward range of Captures!R, produced
by lazily searching for matches in a given input.
-
- $(D alias Engine) specifies an engine type to use during matching,
- and is automatically deduced in a call to $(D match)/$(D bmatch).
+/
-@trusted public struct RegexMatch(R, alias Engine = ThompsonMatcher)
+@trusted public struct RegexMatch(R)
if (isSomeString!R)
{
+ import std.typecons : Rebindable;
private:
- import core.stdc.stdlib : malloc, free;
alias Char = BasicElementOf!R;
- alias EngineType = Engine!Char;
- EngineType _engine;
+ Matcher!Char _engine;
+ Rebindable!(const MatcherFactory!Char) _factory;
R _input;
- Captures!(R,EngineType.DataIndex) _captures;
- void[] _memory;//is ref-counted
+ Captures!R _captures;
this(RegEx)(R input, RegEx prog)
{
import std.exception : enforce;
_input = input;
- immutable size = EngineType.initialMemory(prog)+size_t.sizeof;
- _memory = (enforce(malloc(size), "malloc failed")[0 .. size]);
- scope(failure) free(_memory.ptr);
- *cast(size_t*)_memory.ptr = 1;
- _engine = EngineType(prog, Input!Char(input), _memory[size_t.sizeof..$]);
- static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
- _engine.nativeFn = prog.nativeFn;
- _captures = Captures!(R,EngineType.DataIndex)(this);
- _captures._nMatch = _engine.match(_captures.matches);
- debug(std_regex_allocation) writefln("RefCount (ctor): %x %d", _memory.ptr, counter);
+ if (prog.factory is null) _factory = defaultFactory!Char(prog);
+ else _factory = prog.factory;
+ _engine = _factory.create(prog, input);
+ assert(_engine.refCount == 1);
+ _captures = Captures!R(this);
+ _captures.matches.mutate((slice) pure { _captures._nMatch = _engine.match(slice); });
}
- @property ref size_t counter(){ return *cast(size_t*)_memory.ptr; }
public:
this(this)
{
- if (_memory.ptr)
- {
- ++counter;
- debug(std_regex_allocation) writefln("RefCount (postblit): %x %d",
- _memory.ptr, *cast(size_t*)_memory.ptr);
- }
+ if (_engine) _factory.incRef(_engine);
}
~this()
{
- if (_memory.ptr && --*cast(size_t*)_memory.ptr == 0)
- {
- debug(std_regex_allocation) writefln("RefCount (dtor): %x %d",
- _memory.ptr, *cast(size_t*)_memory.ptr);
- free(cast(void*)_memory.ptr);
- }
+ if (_engine) _factory.decRef(_engine);
}
///Shorthands for front.pre, front.post, front.hit.
assert(m.empty);
---
+/
- @property auto front()
+ @property inout(Captures!R) front() inout
{
return _captures;
}
void popFront()
{
import std.exception : enforce;
- if (counter != 1)
- {//do cow magic first
- counter--;//we abandon this reference
- immutable size = EngineType.initialMemory(_engine.re)+size_t.sizeof;
- _memory = (enforce(malloc(size), "malloc failed")[0 .. size]);
- _engine = _engine.dupTo(_memory[size_t.sizeof .. size]);
- counter = 1;//points to new chunk
- }
-
- if (!_captures.unique)
+ // CoW - if refCount is not 1, we are aliased by somebody else
+ if (_engine.refCount != 1)
{
- // has external references - allocate new space
- _captures.newMatches(_engine.re.ngroup);
+ // we create a new engine & abandon this reference
+ auto old = _engine;
+ _engine = _factory.dup(old, _input);
+ _factory.decRef(old);
}
- _captures._nMatch = _engine.match(_captures.matches);
+ _captures.matches.mutate((slice) { _captures._nMatch = _engine.match(slice); });
}
///ditto
T opCast(T:bool)(){ return !empty; }
/// Same as .front, provided for compatibility with original std.regex.
- @property auto captures() inout { return _captures; }
-
+ @property inout(Captures!R) captures() inout { return _captures; }
}
-private @trusted auto matchOnce(alias Engine, RegEx, R)(R input, RegEx re)
+private auto matchOnceImpl(RegEx, R)(R input, const auto ref RegEx prog) @trusted
{
- import core.stdc.stdlib : malloc, free;
- import std.exception : enforce;
alias Char = BasicElementOf!R;
- alias EngineType = Engine!Char;
-
- size_t size = EngineType.initialMemory(re);
- void[] memory = enforce(malloc(size), "malloc failed")[0 .. size];
- scope(exit) free(memory.ptr);
- auto captures = Captures!(R, EngineType.DataIndex)(input, re.ngroup, re.dict);
- auto engine = EngineType(re, Input!Char(input), memory);
- static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
- engine.nativeFn = re.nativeFn;
- captures._nMatch = engine.match(captures.matches);
+ static struct Key
+ {
+ immutable(Char)[] pattern;
+ uint flags;
+ }
+ static Key cacheKey = Key("", -1);
+ static Matcher!Char cache;
+ auto factory = prog.factory is null ? defaultFactory!Char(prog) : prog.factory;
+ auto key = Key(prog.pattern, prog.flags);
+ Matcher!Char engine;
+ if (cacheKey == key)
+ {
+ engine = cache;
+ engine.rearm(input);
+ }
+ else
+ {
+ engine = factory.create(prog, input);
+ if (cache) factory.decRef(cache); // destroy cached engine *after* building a new one
+ cache = engine;
+ cacheKey = key;
+ }
+ auto captures = Captures!R(input, prog.ngroup, prog.dict);
+ captures.matches.mutate((slice) pure { captures._nMatch = engine.match(slice); });
return captures;
}
-private auto matchMany(alias Engine, RegEx, R)(R input, RegEx re)
+// matchOnce is constructed as a safe, pure wrapper over matchOnceImpl. It can be
+// faked as pure because the static mutable variables are used to cache the key and
+// character matcher. The technique used avoids delegates and GC.
+private @safe auto matchOnce(RegEx, R)(R input, const auto ref RegEx prog) pure
+{
+ static auto impl(R input, const ref RegEx prog)
+ {
+ return matchOnceImpl(input, prog);
+ }
+
+ static @trusted auto pureImpl(R input, const ref RegEx prog)
+ {
+ auto p = assumePureFunction(&impl);
+ return p(input, prog);
+ }
+
+ return pureImpl(input, prog);
+}
+
+private auto matchMany(RegEx, R)(R input, auto ref RegEx re) @safe
{
- re.flags |= RegexOption.global;
- return RegexMatch!(R, Engine)(input, re);
+ return RegexMatch!R(input, re.withFlags(re.flags | RegexOption.global));
}
@system unittest
{
//sanity checks for new API
auto re = regex("abc");
- assert(!"abc".matchOnce!(ThompsonMatcher)(re).empty);
- assert("abc".matchOnce!(ThompsonMatcher)(re)[0] == "abc");
+ assert(!"abc".matchOnce(re).empty);
+ assert("abc".matchOnce(re)[0] == "abc");
}
+// https://issues.dlang.org/show_bug.cgi?id=18135
+@system unittest
+{
+ static struct MapResult { RegexMatch!string m; }
+ MapResult m;
+ m = MapResult();
+ assert(m == m);
+}
private enum isReplaceFunctor(alias fun, R) =
__traits(compiles, (Captures!R c) { fun(c); });
/++
- Start matching $(D input) to regex pattern $(D re),
+ Start matching `input` to regex pattern `re`,
using Thompson NFA matching scheme.
The use of this function is $(RED discouraged) - use either of
matching scheme to use depends highly on the pattern kind and
can done automatically on case by case basis.
- Returns: a $(D RegexMatch) object holding engine state after first match.
+ Returns: a `RegexMatch` object holding engine state after first match.
+/
public auto match(R, RegEx)(R input, RegEx re)
-if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
+if (isSomeString!R && isRegexFor!(RegEx,R))
{
- import std.regex.internal.thompson : ThompsonMatcher;
- return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, re);
+ return RegexMatch!(Unqual!(typeof(input)))(input, re);
}
///ditto
public auto match(R, String)(R input, String re)
if (isSomeString!R && isSomeString!String)
{
- import std.regex.internal.thompson : ThompsonMatcher;
- return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, regex(re));
-}
-
-public auto match(R, RegEx)(R input, RegEx re)
-if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
-{
- import std.regex.internal.backtracking : BacktrackingMatcher;
- return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
+ return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
}
/++
- Find the first (leftmost) slice of the $(D input) that
- matches the pattern $(D re). This function picks the most suitable
+ Find the first (leftmost) slice of the `input` that
+ matches the pattern `re`. This function picks the most suitable
regular expression engine depending on the pattern properties.
- $(D re) parameter can be one of three types:
+ `re` parameter can be one of three types:
$(UL
$(LI Plain string(s), in which case it's compiled to bytecode before matching. )
$(LI Regex!char (wchar/dchar) that contains a pattern in the form of
if there was a match, otherwise an empty $(LREF Captures) object.
+/
public auto matchFirst(R, RegEx)(R input, RegEx re)
-if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
+if (isSomeString!R && isRegexFor!(RegEx, R))
{
- import std.regex.internal.thompson : ThompsonMatcher;
- return matchOnce!ThompsonMatcher(input, re);
+ return matchOnce(input, re);
}
///ditto
public auto matchFirst(R, String)(R input, String re)
if (isSomeString!R && isSomeString!String)
{
- import std.regex.internal.thompson : ThompsonMatcher;
- return matchOnce!ThompsonMatcher(input, regex(re));
+ return matchOnce(input, regex(re));
}
///ditto
public auto matchFirst(R, String)(R input, String[] re...)
if (isSomeString!R && isSomeString!String)
{
- import std.regex.internal.thompson : ThompsonMatcher;
- return matchOnce!ThompsonMatcher(input, regex(re));
-}
-
-public auto matchFirst(R, RegEx)(R input, RegEx re)
-if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
-{
- import std.regex.internal.backtracking : BacktrackingMatcher;
- return matchOnce!(BacktrackingMatcher!true)(input, re);
+ return matchOnce(input, regex(re));
}
/++
- Initiate a search for all non-overlapping matches to the pattern $(D re)
- in the given $(D input). The result is a lazy range of matches generated
+ Initiate a search for all non-overlapping matches to the pattern `re`
+ in the given `input`. The result is a lazy range of matches generated
as they are encountered in the input going left to right.
This function picks the most suitable regular expression engine
depending on the pattern properties.
- $(D re) parameter can be one of three types:
+ `re` parameter can be one of three types:
$(UL
$(LI Plain string(s), in which case it's compiled to bytecode before matching. )
$(LI Regex!char (wchar/dchar) that contains a pattern in the form of
after the first match was found or an empty one if not present.
+/
public auto matchAll(R, RegEx)(R input, RegEx re)
-if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
+if (isSomeString!R && isRegexFor!(RegEx, R))
{
- import std.regex.internal.thompson : ThompsonMatcher;
- return matchMany!ThompsonMatcher(input, re);
+ return matchMany(input, re);
}
///ditto
public auto matchAll(R, String)(R input, String re)
if (isSomeString!R && isSomeString!String)
{
- import std.regex.internal.thompson : ThompsonMatcher;
- return matchMany!ThompsonMatcher(input, regex(re));
+ return matchMany(input, regex(re));
}
///ditto
public auto matchAll(R, String)(R input, String[] re...)
if (isSomeString!R && isSomeString!String)
{
- import std.regex.internal.thompson : ThompsonMatcher;
- return matchMany!ThompsonMatcher(input, regex(re));
-}
-
-public auto matchAll(R, RegEx)(R input, RegEx re)
-if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
-{
- import std.regex.internal.backtracking : BacktrackingMatcher;
- return matchMany!(BacktrackingMatcher!true)(input, re);
+ return matchMany(input, regex(re));
}
// another set of tests just to cover the new API
import std.algorithm.iteration : map;
import std.conv : to;
- foreach (String; AliasSeq!(string, wstring, const(dchar)[]))
- {
+ static foreach (String; AliasSeq!(string, wstring, const(dchar)[]))
+ {{
auto str1 = "blah-bleh".to!String();
auto pat1 = "bl[ae]h".to!String();
auto mf = matchFirst(str1, pat1);
assert(cmAll.front.equal(cmf));
cmAll.popFront();
assert(cmAll.front.equal(["6/1", "6", "1"].map!(to!String)()));
- }
+ }}
}
/++
- Start matching of $(D input) to regex pattern $(D re),
+ Start matching of `input` to regex pattern `re`,
using traditional $(LINK2 https://en.wikipedia.org/wiki/Backtracking,
backtracking) matching scheme.
matching scheme to use depends highly on the pattern kind and
can done automatically on case by case basis.
- Returns: a $(D RegexMatch) object holding engine
+ Returns: a `RegexMatch` object holding engine
state after first match.
+/
public auto bmatch(R, RegEx)(R input, RegEx re)
-if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
+if (isSomeString!R && isRegexFor!(RegEx, R))
{
- import std.regex.internal.backtracking : BacktrackingMatcher;
- return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, re);
+ return RegexMatch!(Unqual!(typeof(input)))(input, re);
}
///ditto
public auto bmatch(R, String)(R input, String re)
if (isSomeString!R && isSomeString!String)
{
- import std.regex.internal.backtracking : BacktrackingMatcher;
- return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, regex(re));
-}
-
-public auto bmatch(R, RegEx)(R input, RegEx re)
-if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
-{
- import std.regex.internal.backtracking : BacktrackingMatcher;
- return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
+ return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
}
// produces replacement string from format using captures for substitution
}
/++
- Construct a new string from $(D input) by replacing the first match with
- a string generated from it according to the $(D format) specifier.
+ Construct a new string from `input` by replacing the first match with
+ a string generated from it according to the `format` specifier.
To replace all matches use $(LREF replaceAll).
/++
This is a general replacement tool that construct a new string by replacing
- matches of pattern $(D re) in the $(D input). Unlike the other overload
+ matches of pattern `re` in the `input`. Unlike the other overload
there is no format string instead captures are passed to
- to a user-defined functor $(D fun) that returns a new string
+ to a user-defined functor `fun` that returns a new string
to use as replacement.
- This version replaces the first match in $(D input),
+ This version replaces the first match in `input`,
see $(LREF replaceAll) to replace the all of the matches.
Returns:
- A new string of the same type as $(D input) with all matches
- replaced by return values of $(D fun). If no matches found
- returns the $(D input) itself.
+ A new string of the same type as `input` with all matches
+ replaced by return values of `fun`. If no matches found
+ returns the `input` itself.
+/
public R replaceFirst(alias fun, R, RegEx)(R input, RegEx re)
if (isSomeString!R && isRegexFor!(RegEx, R))
/++
A variation on $(LREF replaceFirst) that instead of allocating a new string
- on each call outputs the result piece-wise to the $(D sink). In particular
+ on each call outputs the result piece-wise to the `sink`. In particular
this enables efficient construction of a final output incrementally.
Like in $(LREF replaceFirst) family of functions there is an overload
- for the substitution guided by the $(D format) string
+ for the substitution guided by the `format` string
and the one with the user defined callback.
+/
public @trusted void replaceFirstInto(Sink, R, C, RegEx)
}
/++
- Construct a new string from $(D input) by replacing all of the
- fragments that match a pattern $(D re) with a string generated
- from the match according to the $(D format) specifier.
+ Construct a new string from `input` by replacing all of the
+ fragments that match a pattern `re` with a string generated
+ from the match according to the `format` specifier.
To replace only the first match use $(LREF replaceFirst).
see $(S_LINK Replace _format string, the _format string).
Returns:
- A string of the same type as $(D input) with the all
+ A string of the same type as `input` with the all
of the matches (if any) replaced.
If no match is found returns the input string itself.
+/
/++
This is a general replacement tool that construct a new string by replacing
- matches of pattern $(D re) in the $(D input). Unlike the other overload
+ matches of pattern `re` in the `input`. Unlike the other overload
there is no format string instead captures are passed to
- to a user-defined functor $(D fun) that returns a new string
+ to a user-defined functor `fun` that returns a new string
to use as replacement.
- This version replaces all of the matches found in $(D input),
+ This version replaces all of the matches found in `input`,
see $(LREF replaceFirst) to replace the first match only.
Returns:
- A new string of the same type as $(D input) with all matches
- replaced by return values of $(D fun). If no matches found
- returns the $(D input) itself.
+ A new string of the same type as `input` with all matches
+ replaced by return values of `fun`. If no matches found
+ returns the `input` itself.
Params:
input = string to search
/++
A variation on $(LREF replaceAll) that instead of allocating a new string
- on each call outputs the result piece-wise to the $(D sink). In particular
+ on each call outputs the result piece-wise to the `sink`. In particular
this enables efficient construction of a final output incrementally.
As with $(LREF replaceAll) there are 2 overloads - one with a format string,
import std.array : appender;
import std.conv;
// try and check first/all simple substitution
- foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[]))
- {
+ static foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[]))
+ {{
S s1 = "curt trial".to!S();
S s2 = "round dome".to!S();
S t1F = "court trial".to!S();
assert(sink.data == t1F~t2F~t1A);
replaceAllInto(sink, s2, re2, "ho");
assert(sink.data == t1F~t2F~t1A~t2A);
- }
+ }}
}
/++
- Old API for replacement, operation depends on flags of pattern $(D re).
+ Old API for replacement, operation depends on flags of pattern `re`.
With "g" flag it performs the equivalent of $(LREF replaceAll) otherwise it
works the same as $(LREF replaceFirst).
@trusted this(Range input, RegEx separator)
{//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted
_input = input;
- separator.flags |= RegexOption.global;
+ const re = separator.withFlags(separator.flags | RegexOption.global);
if (_input.empty)
{
//there is nothing to match at all, make _offset > 0
}
else
{
- _match = Rx(_input, separator);
+ _match = Rx(_input, re);
static if (keepSeparators)
if (_match.pre.empty)
.equal([",", "1", ",", "2", ",", "3"]));
}
-///An eager version of $(D splitter) that creates an array with splitted slices of $(D input).
+///An eager version of `splitter` that creates an array with splitted slices of `input`.
public @trusted String[] split(String, RegEx)(String input, RegEx rx)
if (isSomeString!String && isRegexFor!(RegEx, String))
{
{
import std.algorithm.comparison;
import std.conv;
- foreach (S; AliasSeq!(string, wstring, dstring))
- {
+ static foreach (S; AliasSeq!(string, wstring, dstring))
+ {{
auto s = "^".to!S;
assert(s.escaper.equal(`\^`));
auto s2 = "";
assert(s2.escaper.equal(""));
- }
+ }}
}