]> git.ipfire.org Git - thirdparty/gcc.git/blob - libphobos/src/std/string.d
Add D front-end, libphobos library, and D2 testsuite.
[thirdparty/gcc.git] / libphobos / src / std / string.d
1 // Written in the D programming language.
2
3 /**
4 String handling functions.
5
6 $(SCRIPT inhibitQuickIndex = 1;)
7
8 $(DIVC quickindex,
9 $(BOOKTABLE ,
10 $(TR $(TH Category) $(TH Functions) )
11 $(TR $(TDNW Searching)
12 $(TD
13 $(MYREF column)
14 $(MYREF indexOf)
15 $(MYREF indexOfAny)
16 $(MYREF indexOfNeither)
17 $(MYREF lastIndexOf)
18 $(MYREF lastIndexOfAny)
19 $(MYREF lastIndexOfNeither)
20 )
21 )
22 $(TR $(TDNW Comparison)
23 $(TD
24 $(MYREF isNumeric)
25 )
26 )
27 $(TR $(TDNW Mutation)
28 $(TD
29 $(MYREF capitalize)
30 )
31 )
32 $(TR $(TDNW Pruning and Filling)
33 $(TD
34 $(MYREF center)
35 $(MYREF chomp)
36 $(MYREF chompPrefix)
37 $(MYREF chop)
38 $(MYREF detabber)
39 $(MYREF detab)
40 $(MYREF entab)
41 $(MYREF entabber)
42 $(MYREF leftJustify)
43 $(MYREF outdent)
44 $(MYREF rightJustify)
45 $(MYREF strip)
46 $(MYREF stripLeft)
47 $(MYREF stripRight)
48 $(MYREF wrap)
49 )
50 )
51 $(TR $(TDNW Substitution)
52 $(TD
53 $(MYREF abbrev)
54 $(MYREF soundex)
55 $(MYREF soundexer)
56 $(MYREF succ)
57 $(MYREF tr)
58 $(MYREF translate)
59 )
60 )
61 $(TR $(TDNW Miscellaneous)
62 $(TD
63 $(MYREF assumeUTF)
64 $(MYREF fromStringz)
65 $(MYREF lineSplitter)
66 $(MYREF representation)
67 $(MYREF splitLines)
68 $(MYREF toStringz)
69 )
70 )))
71
72 Objects of types $(D _string), $(D wstring), and $(D dstring) are value types
73 and cannot be mutated element-by-element. For using mutation during building
74 strings, use $(D char[]), $(D wchar[]), or $(D dchar[]). The $(D xxxstring)
75 types are preferable because they don't exhibit undesired aliasing, thus
76 making code more robust.
77
78 The following functions are publicly imported:
79
80 $(BOOKTABLE ,
81 $(TR $(TH Module) $(TH Functions) )
82 $(LEADINGROW Publicly imported functions)
83 $(TR $(TD std.algorithm)
84 $(TD
85 $(REF_SHORT cmp, std,algorithm,comparison)
86 $(REF_SHORT count, std,algorithm,searching)
87 $(REF_SHORT endsWith, std,algorithm,searching)
88 $(REF_SHORT startsWith, std,algorithm,searching)
89 ))
90 $(TR $(TD std.array)
91 $(TD
92 $(REF_SHORT join, std,array)
93 $(REF_SHORT replace, std,array)
94 $(REF_SHORT replaceInPlace, std,array)
95 $(REF_SHORT split, std,array)
96 $(REF_SHORT empty, std,array)
97 ))
98 $(TR $(TD std.format)
99 $(TD
100 $(REF_SHORT format, std,format)
101 $(REF_SHORT sformat, std,format)
102 ))
103 $(TR $(TD std.uni)
104 $(TD
105 $(REF_SHORT icmp, std,uni)
106 $(REF_SHORT toLower, std,uni)
107 $(REF_SHORT toLowerInPlace, std,uni)
108 $(REF_SHORT toUpper, std,uni)
109 $(REF_SHORT toUpperInPlace, std,uni)
110 ))
111 )
112
113 There is a rich set of functions for _string handling defined in other modules.
114 Functions related to Unicode and ASCII are found in $(MREF std, uni)
115 and $(MREF std, ascii), respectively. Other functions that have a
116 wider generality than just strings can be found in $(MREF std, algorithm)
117 and $(MREF std, range).
118
119 See_Also:
120 $(LIST
121 $(MREF std, algorithm) and
122 $(MREF std, range)
123 for generic range algorithms
124 ,
125 $(MREF std, ascii)
126 for functions that work with ASCII strings
127 ,
128 $(MREF std, uni)
129 for functions that work with unicode strings
130 )
131
132 Copyright: Copyright Digital Mars 2007-.
133
134 License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0).
135
136 Authors: $(HTTP digitalmars.com, Walter Bright),
137 $(HTTP erdani.org, Andrei Alexandrescu),
138 Jonathan M Davis,
139 and David L. 'SpottedTiger' Davis
140
141 Source: $(PHOBOSSRC std/_string.d)
142
143 */
144 module std.string;
145
146 version (unittest)
147 {
148 private:
149 struct TestAliasedString
150 {
151 string get() @safe @nogc pure nothrow { return _s; }
152 alias get this;
153 @disable this(this);
154 string _s;
155 }
156
157 bool testAliasedString(alias func, Args...)(string s, Args args)
158 {
159 import std.algorithm.comparison : equal;
160 auto a = func(TestAliasedString(s), args);
161 auto b = func(s, args);
162 static if (is(typeof(equal(a, b))))
163 {
164 // For ranges, compare contents instead of object identity.
165 return equal(a, b);
166 }
167 else
168 {
169 return a == b;
170 }
171 }
172 }
173
174 public import std.format : format, sformat;
175 import std.typecons : Flag, Yes, No;
176 public import std.uni : icmp, toLower, toLowerInPlace, toUpper, toUpperInPlace;
177
178 import std.meta; // AliasSeq, staticIndexOf
179 import std.range.primitives; // back, ElementEncodingType, ElementType, front,
180 // hasLength, hasSlicing, isBidirectionalRange, isForwardRange, isInfinite,
181 // isInputRange, isOutputRange, isRandomAccessRange, popBack, popFront, put,
182 // save;
183 import std.traits; // isConvertibleToString, isNarrowString, isSomeChar,
184 // isSomeString, StringTypeOf, Unqual
185
186 //public imports for backward compatibility
187 public import std.algorithm.comparison : cmp;
188 public import std.algorithm.searching : startsWith, endsWith, count;
189 public import std.array : join, replace, replaceInPlace, split, empty;
190
191 /* ************* Exceptions *************** */
192
193 /++
194 Exception thrown on errors in std.string functions.
195 +/
196 class StringException : Exception
197 {
198 import std.exception : basicExceptionCtors;
199
200 ///
201 mixin basicExceptionCtors;
202 }
203
204
205 /++
206 Params:
207 cString = A null-terminated c-style string.
208
209 Returns: A D-style array of $(D char) referencing the same string. The
210 returned array will retain the same type qualifiers as the input.
211
212 $(RED Important Note:) The returned array is a slice of the original buffer.
213 The original data is not changed and not copied.
214 +/
215
216 inout(char)[] fromStringz(inout(char)* cString) @nogc @system pure nothrow {
217 import core.stdc.string : strlen;
218 return cString ? cString[0 .. strlen(cString)] : null;
219 }
220
221 ///
222 @system pure unittest
223 {
224 assert(fromStringz(null) == null);
225 assert(fromStringz("foo") == "foo");
226 }
227
228 /++
229 Params:
230 s = A D-style string.
231
232 Returns: A C-style null-terminated string equivalent to $(D s). $(D s)
233 must not contain embedded $(D '\0')'s as any C function will treat the
234 first $(D '\0') that it sees as the end of the string. If $(D s.empty) is
235 $(D true), then a string containing only $(D '\0') is returned.
236
237 $(RED Important Note:) When passing a $(D char*) to a C function, and the C
238 function keeps it around for any reason, make sure that you keep a
239 reference to it in your D code. Otherwise, it may become invalid during a
240 garbage collection cycle and cause a nasty bug when the C code tries to use
241 it.
242 +/
243 immutable(char)* toStringz(const(char)[] s) @trusted pure nothrow
244 out (result)
245 {
246 import core.stdc.string : strlen, memcmp;
247 if (result)
248 {
249 auto slen = s.length;
250 while (slen > 0 && s[slen-1] == 0) --slen;
251 assert(strlen(result) == slen);
252 assert(result[0 .. slen] == s[0 .. slen]);
253 }
254 }
255 body
256 {
257 import std.exception : assumeUnique;
258 /+ Unfortunately, this isn't reliable.
259 We could make this work if string literals are put
260 in read-only memory and we test if s[] is pointing into
261 that.
262
263 /* Peek past end of s[], if it's 0, no conversion necessary.
264 * Note that the compiler will put a 0 past the end of static
265 * strings, and the storage allocator will put a 0 past the end
266 * of newly allocated char[]'s.
267 */
268 char* p = &s[0] + s.length;
269 if (*p == 0)
270 return s;
271 +/
272
273 // Need to make a copy
274 auto copy = new char[s.length + 1];
275 copy[0 .. s.length] = s[];
276 copy[s.length] = 0;
277
278 return &assumeUnique(copy)[0];
279 }
280
281 /++ Ditto +/
282 immutable(char)* toStringz(in string s) @trusted pure nothrow
283 {
284 if (s.empty) return "".ptr;
285 /* Peek past end of s[], if it's 0, no conversion necessary.
286 * Note that the compiler will put a 0 past the end of static
287 * strings, and the storage allocator will put a 0 past the end
288 * of newly allocated char[]'s.
289 */
290 immutable p = s.ptr + s.length;
291 // Is p dereferenceable? A simple test: if the p points to an
292 // address multiple of 4, then conservatively assume the pointer
293 // might be pointing to a new block of memory, which might be
294 // unreadable. Otherwise, it's definitely pointing to valid
295 // memory.
296 if ((cast(size_t) p & 3) && *p == 0)
297 return &s[0];
298 return toStringz(cast(const char[]) s);
299 }
300
301 ///
302 pure nothrow @system unittest
303 {
304 import core.stdc.string : strlen;
305 import std.conv : to;
306
307 auto p = toStringz("foo");
308 assert(strlen(p) == 3);
309 const(char)[] foo = "abbzxyzzy";
310 p = toStringz(foo[3 .. 5]);
311 assert(strlen(p) == 2);
312
313 string test = "";
314 p = toStringz(test);
315 assert(*p == 0);
316
317 test = "\0";
318 p = toStringz(test);
319 assert(*p == 0);
320
321 test = "foo\0";
322 p = toStringz(test);
323 assert(p[0] == 'f' && p[1] == 'o' && p[2] == 'o' && p[3] == 0);
324
325 const string test2 = "";
326 p = toStringz(test2);
327 assert(*p == 0);
328 }
329
330
331 /**
332 Flag indicating whether a search is case-sensitive.
333 */
334 alias CaseSensitive = Flag!"caseSensitive";
335
336 /++
337 Searches for character in range.
338
339 Params:
340 s = string or InputRange of characters to search in correct UTF format
341 c = character to search for
342 startIdx = starting index to a well-formed code point
343 cs = $(D Yes.caseSensitive) or $(D No.caseSensitive)
344
345 Returns:
346 the index of the first occurrence of $(D c) in $(D s) with
347 respect to the start index $(D startIdx). If $(D c)
348 is not found, then $(D -1) is returned.
349 If $(D c) is found the value of the returned index is at least
350 $(D startIdx).
351 If the parameters are not valid UTF, the result will still
352 be in the range [-1 .. s.length], but will not be reliable otherwise.
353
354 Throws:
355 If the sequence starting at $(D startIdx) does not represent a well
356 formed codepoint, then a $(REF UTFException, std,utf) may be thrown.
357
358 See_Also: $(REF countUntil, std,algorithm,searching)
359 +/
360 ptrdiff_t indexOf(Range)(Range s, in dchar c,
361 in CaseSensitive cs = Yes.caseSensitive)
362 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) &&
363 !isConvertibleToString!Range)
364 {
365 static import std.ascii;
366 static import std.uni;
367 import std.utf : byDchar, byCodeUnit, UTFException, codeLength;
368 alias Char = Unqual!(ElementEncodingType!Range);
369
370 if (cs == Yes.caseSensitive)
371 {
372 static if (Char.sizeof == 1 && isSomeString!Range)
373 {
374 if (std.ascii.isASCII(c) && !__ctfe)
375 { // Plain old ASCII
376 static ptrdiff_t trustedmemchr(Range s, char c) @trusted
377 {
378 import core.stdc.string : memchr;
379 const p = cast(const(Char)*)memchr(s.ptr, c, s.length);
380 return p ? p - s.ptr : -1;
381 }
382
383 return trustedmemchr(s, cast(char) c);
384 }
385 }
386
387 static if (Char.sizeof == 1)
388 {
389 if (c <= 0x7F)
390 {
391 ptrdiff_t i;
392 foreach (const c2; s)
393 {
394 if (c == c2)
395 return i;
396 ++i;
397 }
398 }
399 else
400 {
401 ptrdiff_t i;
402 foreach (const c2; s.byDchar())
403 {
404 if (c == c2)
405 return i;
406 i += codeLength!Char(c2);
407 }
408 }
409 }
410 else static if (Char.sizeof == 2)
411 {
412 if (c <= 0xFFFF)
413 {
414 ptrdiff_t i;
415 foreach (const c2; s)
416 {
417 if (c == c2)
418 return i;
419 ++i;
420 }
421 }
422 else if (c <= 0x10FFFF)
423 {
424 // Encode UTF-16 surrogate pair
425 const wchar c1 = cast(wchar)((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
426 const wchar c2 = cast(wchar)(((c - 0x10000) & 0x3FF) + 0xDC00);
427 ptrdiff_t i;
428 for (auto r = s.byCodeUnit(); !r.empty; r.popFront())
429 {
430 if (c1 == r.front)
431 {
432 r.popFront();
433 if (r.empty) // invalid UTF - missing second of pair
434 break;
435 if (c2 == r.front)
436 return i;
437 ++i;
438 }
439 ++i;
440 }
441 }
442 }
443 else static if (Char.sizeof == 4)
444 {
445 ptrdiff_t i;
446 foreach (const c2; s)
447 {
448 if (c == c2)
449 return i;
450 ++i;
451 }
452 }
453 else
454 static assert(0);
455 return -1;
456 }
457 else
458 {
459 if (std.ascii.isASCII(c))
460 { // Plain old ASCII
461 immutable c1 = cast(char) std.ascii.toLower(c);
462
463 ptrdiff_t i;
464 foreach (const c2; s.byCodeUnit())
465 {
466 if (c1 == std.ascii.toLower(c2))
467 return i;
468 ++i;
469 }
470 }
471 else
472 { // c is a universal character
473 immutable c1 = std.uni.toLower(c);
474
475 ptrdiff_t i;
476 foreach (const c2; s.byDchar())
477 {
478 if (c1 == std.uni.toLower(c2))
479 return i;
480 i += codeLength!Char(c2);
481 }
482 }
483 }
484 return -1;
485 }
486
487 /// Ditto
488 ptrdiff_t indexOf(Range)(Range s, in dchar c, in size_t startIdx,
489 in CaseSensitive cs = Yes.caseSensitive)
490 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) &&
491 !isConvertibleToString!Range)
492 {
493 static if (isSomeString!(typeof(s)) ||
494 (hasSlicing!(typeof(s)) && hasLength!(typeof(s))))
495 {
496 if (startIdx < s.length)
497 {
498 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], c, cs);
499 if (foundIdx != -1)
500 {
501 return foundIdx + cast(ptrdiff_t) startIdx;
502 }
503 }
504 }
505 else
506 {
507 foreach (i; 0 .. startIdx)
508 {
509 if (s.empty)
510 return -1;
511 s.popFront();
512 }
513 ptrdiff_t foundIdx = indexOf(s, c, cs);
514 if (foundIdx != -1)
515 {
516 return foundIdx + cast(ptrdiff_t) startIdx;
517 }
518 }
519 return -1;
520 }
521
522 ///
523 @safe pure unittest
524 {
525 import std.typecons : No;
526
527 string s = "Hello World";
528 assert(indexOf(s, 'W') == 6);
529 assert(indexOf(s, 'Z') == -1);
530 assert(indexOf(s, 'w', No.caseSensitive) == 6);
531 }
532
533 ///
534 @safe pure unittest
535 {
536 import std.typecons : No;
537
538 string s = "Hello World";
539 assert(indexOf(s, 'W', 4) == 6);
540 assert(indexOf(s, 'Z', 100) == -1);
541 assert(indexOf(s, 'w', 3, No.caseSensitive) == 6);
542 }
543
544 ptrdiff_t indexOf(Range)(auto ref Range s, in dchar c,
545 in CaseSensitive cs = Yes.caseSensitive)
546 if (isConvertibleToString!Range)
547 {
548 return indexOf!(StringTypeOf!Range)(s, c, cs);
549 }
550
551 ptrdiff_t indexOf(Range)(auto ref Range s, in dchar c, in size_t startIdx,
552 in CaseSensitive cs = Yes.caseSensitive)
553 if (isConvertibleToString!Range)
554 {
555 return indexOf!(StringTypeOf!Range)(s, c, startIdx, cs);
556 }
557
558 @safe pure unittest
559 {
560 assert(testAliasedString!indexOf("std/string.d", '/'));
561 }
562
563 @safe pure unittest
564 {
565 import std.conv : to;
566 import std.exception : assertCTFEable;
567 import std.traits : EnumMembers;
568 import std.utf : byChar, byWchar, byDchar;
569
570 assertCTFEable!(
571 {
572 foreach (S; AliasSeq!(string, wstring, dstring))
573 {
574 assert(indexOf(cast(S) null, cast(dchar)'a') == -1);
575 assert(indexOf(to!S("def"), cast(dchar)'a') == -1);
576 assert(indexOf(to!S("abba"), cast(dchar)'a') == 0);
577 assert(indexOf(to!S("def"), cast(dchar)'f') == 2);
578
579 assert(indexOf(to!S("def"), cast(dchar)'a', No.caseSensitive) == -1);
580 assert(indexOf(to!S("def"), cast(dchar)'a', No.caseSensitive) == -1);
581 assert(indexOf(to!S("Abba"), cast(dchar)'a', No.caseSensitive) == 0);
582 assert(indexOf(to!S("def"), cast(dchar)'F', No.caseSensitive) == 2);
583 assert(indexOf(to!S("ödef"), 'ö', No.caseSensitive) == 0);
584
585 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
586 assert(indexOf("def", cast(char)'f', No.caseSensitive) == 2);
587 assert(indexOf(sPlts, cast(char)'P', No.caseSensitive) == 23);
588 assert(indexOf(sPlts, cast(char)'R', No.caseSensitive) == 2);
589 }
590
591 foreach (cs; EnumMembers!CaseSensitive)
592 {
593 assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', cs) == 9);
594 assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', cs) == 7);
595 assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', cs) == 6);
596
597 assert(indexOf("hello\U00010143\u0100\U00010143".byChar, '\u0100', cs) == 9);
598 assert(indexOf("hello\U00010143\u0100\U00010143".byWchar, '\u0100', cs) == 7);
599 assert(indexOf("hello\U00010143\u0100\U00010143".byDchar, '\u0100', cs) == 6);
600
601 assert(indexOf("hello\U000007FF\u0100\U00010143".byChar, 'l', cs) == 2);
602 assert(indexOf("hello\U000007FF\u0100\U00010143".byChar, '\u0100', cs) == 7);
603 assert(indexOf("hello\U0000EFFF\u0100\U00010143".byChar, '\u0100', cs) == 8);
604
605 assert(indexOf("hello\U00010100".byWchar, '\U00010100', cs) == 5);
606 assert(indexOf("hello\U00010100".byWchar, '\U00010101', cs) == -1);
607 }
608
609 char[10] fixedSizeArray = "0123456789";
610 assert(indexOf(fixedSizeArray, '2') == 2);
611 });
612 }
613
614 @safe pure unittest
615 {
616 assert(testAliasedString!indexOf("std/string.d", '/', 3));
617 }
618
619 @safe pure unittest
620 {
621 import std.conv : to;
622 import std.traits : EnumMembers;
623 import std.utf : byCodeUnit, byChar, byWchar;
624
625 assert("hello".byCodeUnit.indexOf(cast(dchar)'l', 1) == 2);
626 assert("hello".byWchar.indexOf(cast(dchar)'l', 1) == 2);
627 assert("hello".byWchar.indexOf(cast(dchar)'l', 6) == -1);
628
629 foreach (S; AliasSeq!(string, wstring, dstring))
630 {
631 assert(indexOf(cast(S) null, cast(dchar)'a', 1) == -1);
632 assert(indexOf(to!S("def"), cast(dchar)'a', 1) == -1);
633 assert(indexOf(to!S("abba"), cast(dchar)'a', 1) == 3);
634 assert(indexOf(to!S("def"), cast(dchar)'f', 1) == 2);
635
636 assert((to!S("def")).indexOf(cast(dchar)'a', 1,
637 No.caseSensitive) == -1);
638 assert(indexOf(to!S("def"), cast(dchar)'a', 1,
639 No.caseSensitive) == -1);
640 assert(indexOf(to!S("def"), cast(dchar)'a', 12,
641 No.caseSensitive) == -1);
642 assert(indexOf(to!S("AbbA"), cast(dchar)'a', 2,
643 No.caseSensitive) == 3);
644 assert(indexOf(to!S("def"), cast(dchar)'F', 2, No.caseSensitive) == 2);
645
646 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
647 assert(indexOf("def", cast(char)'f', cast(uint) 2,
648 No.caseSensitive) == 2);
649 assert(indexOf(sPlts, cast(char)'P', 12, No.caseSensitive) == 23);
650 assert(indexOf(sPlts, cast(char)'R', cast(ulong) 1,
651 No.caseSensitive) == 2);
652 }
653
654 foreach (cs; EnumMembers!CaseSensitive)
655 {
656 assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', 2, cs)
657 == 9);
658 assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', 3, cs)
659 == 7);
660 assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', 6, cs)
661 == 6);
662 }
663 }
664
665 /++
666 Searches for substring in $(D s).
667
668 Params:
669 s = string or ForwardRange of characters to search in correct UTF format
670 sub = substring to search for
671 startIdx = the index into s to start searching from
672 cs = $(D Yes.caseSensitive) or $(D No.caseSensitive)
673
674 Returns:
675 the index of the first occurrence of $(D sub) in $(D s) with
676 respect to the start index $(D startIdx). If $(D sub) is not found,
677 then $(D -1) is returned.
678 If the arguments are not valid UTF, the result will still
679 be in the range [-1 .. s.length], but will not be reliable otherwise.
680 If $(D sub) is found the value of the returned index is at least
681 $(D startIdx).
682
683 Throws:
684 If the sequence starting at $(D startIdx) does not represent a well
685 formed codepoint, then a $(REF UTFException, std,utf) may be thrown.
686
687 Bugs:
688 Does not work with case insensitive strings where the mapping of
689 tolower and toupper is not 1:1.
690 +/
691 ptrdiff_t indexOf(Range, Char)(Range s, const(Char)[] sub,
692 in CaseSensitive cs = Yes.caseSensitive)
693 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
694 isSomeChar!Char)
695 {
696 alias Char1 = Unqual!(ElementEncodingType!Range);
697
698 static if (isSomeString!Range)
699 {
700 import std.algorithm.searching : find;
701
702 const(Char1)[] balance;
703 if (cs == Yes.caseSensitive)
704 {
705 balance = find(s, sub);
706 }
707 else
708 {
709 balance = find!
710 ((a, b) => toLower(a) == toLower(b))
711 (s, sub);
712 }
713 return () @trusted { return balance.empty ? -1 : balance.ptr - s.ptr; } ();
714 }
715 else
716 {
717 if (s.empty)
718 return -1;
719 if (sub.empty)
720 return 0; // degenerate case
721
722 import std.utf : byDchar, codeLength;
723 auto subr = sub.byDchar; // decode sub[] by dchar's
724 dchar sub0 = subr.front; // cache first character of sub[]
725 subr.popFront();
726
727 // Special case for single character search
728 if (subr.empty)
729 return indexOf(s, sub0, cs);
730
731 if (cs == No.caseSensitive)
732 sub0 = toLower(sub0);
733
734 /* Classic double nested loop search algorithm
735 */
736 ptrdiff_t index = 0; // count code unit index into s
737 for (auto sbydchar = s.byDchar(); !sbydchar.empty; sbydchar.popFront())
738 {
739 dchar c2 = sbydchar.front;
740 if (cs == No.caseSensitive)
741 c2 = toLower(c2);
742 if (c2 == sub0)
743 {
744 auto s2 = sbydchar.save; // why s must be a forward range
745 foreach (c; subr.save)
746 {
747 s2.popFront();
748 if (s2.empty)
749 return -1;
750 if (cs == Yes.caseSensitive ? c != s2.front
751 : toLower(c) != toLower(s2.front)
752 )
753 goto Lnext;
754 }
755 return index;
756 }
757 Lnext:
758 index += codeLength!Char1(c2);
759 }
760 return -1;
761 }
762 }
763
764 /// Ditto
765 ptrdiff_t indexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub,
766 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive)
767 @safe
768 if (isSomeChar!Char1 && isSomeChar!Char2)
769 {
770 if (startIdx < s.length)
771 {
772 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], sub, cs);
773 if (foundIdx != -1)
774 {
775 return foundIdx + cast(ptrdiff_t) startIdx;
776 }
777 }
778 return -1;
779 }
780
781 ///
782 @safe pure unittest
783 {
784 import std.typecons : No;
785
786 string s = "Hello World";
787 assert(indexOf(s, "Wo", 4) == 6);
788 assert(indexOf(s, "Zo", 100) == -1);
789 assert(indexOf(s, "wo", 3, No.caseSensitive) == 6);
790 }
791
792 ///
793 @safe pure unittest
794 {
795 import std.typecons : No;
796
797 string s = "Hello World";
798 assert(indexOf(s, "Wo") == 6);
799 assert(indexOf(s, "Zo") == -1);
800 assert(indexOf(s, "wO", No.caseSensitive) == 6);
801 }
802
803 ptrdiff_t indexOf(Range, Char)(auto ref Range s, const(Char)[] sub,
804 in CaseSensitive cs = Yes.caseSensitive)
805 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
806 isSomeChar!Char) &&
807 is(StringTypeOf!Range))
808 {
809 return indexOf!(StringTypeOf!Range)(s, sub, cs);
810 }
811
812 @safe pure unittest
813 {
814 assert(testAliasedString!indexOf("std/string.d", "string"));
815 }
816
817 @safe pure unittest
818 {
819 import std.conv : to;
820 import std.exception : assertCTFEable;
821 import std.traits : EnumMembers;
822
823 assertCTFEable!(
824 {
825 foreach (S; AliasSeq!(string, wstring, dstring))
826 {
827 foreach (T; AliasSeq!(string, wstring, dstring))
828 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
829 assert(indexOf(cast(S) null, to!T("a")) == -1);
830 assert(indexOf(to!S("def"), to!T("a")) == -1);
831 assert(indexOf(to!S("abba"), to!T("a")) == 0);
832 assert(indexOf(to!S("def"), to!T("f")) == 2);
833 assert(indexOf(to!S("dfefffg"), to!T("fff")) == 3);
834 assert(indexOf(to!S("dfeffgfff"), to!T("fff")) == 6);
835
836 assert(indexOf(to!S("dfeffgfff"), to!T("a"), No.caseSensitive) == -1);
837 assert(indexOf(to!S("def"), to!T("a"), No.caseSensitive) == -1);
838 assert(indexOf(to!S("abba"), to!T("a"), No.caseSensitive) == 0);
839 assert(indexOf(to!S("def"), to!T("f"), No.caseSensitive) == 2);
840 assert(indexOf(to!S("dfefffg"), to!T("fff"), No.caseSensitive) == 3);
841 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), No.caseSensitive) == 6);
842
843 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
844 S sMars = "Who\'s \'My Favorite Maritian?\'";
845
846 assert(indexOf(sMars, to!T("MY fAVe"), No.caseSensitive) == -1);
847 assert(indexOf(sMars, to!T("mY fAVOriTe"), No.caseSensitive) == 7);
848 assert(indexOf(sPlts, to!T("mArS:"), No.caseSensitive) == 0);
849 assert(indexOf(sPlts, to!T("rOcK"), No.caseSensitive) == 17);
850 assert(indexOf(sPlts, to!T("Un."), No.caseSensitive) == 41);
851 assert(indexOf(sPlts, to!T(sPlts), No.caseSensitive) == 0);
852
853 assert(indexOf("\u0100", to!T("\u0100"), No.caseSensitive) == 0);
854
855 // Thanks to Carlos Santander B. and zwang
856 assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y",
857 to!T("page-break-before"), No.caseSensitive) == -1);
858 }();
859
860 foreach (cs; EnumMembers!CaseSensitive)
861 {
862 assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"), cs) == 9);
863 assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"), cs) == 7);
864 assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"), cs) == 6);
865 }
866 }
867 });
868 }
869
870 @safe pure @nogc nothrow
871 unittest
872 {
873 import std.traits : EnumMembers;
874 import std.utf : byWchar;
875
876 foreach (cs; EnumMembers!CaseSensitive)
877 {
878 assert(indexOf("".byWchar, "", cs) == -1);
879 assert(indexOf("hello".byWchar, "", cs) == 0);
880 assert(indexOf("hello".byWchar, "l", cs) == 2);
881 assert(indexOf("heLLo".byWchar, "LL", cs) == 2);
882 assert(indexOf("hello".byWchar, "lox", cs) == -1);
883 assert(indexOf("hello".byWchar, "betty", cs) == -1);
884 assert(indexOf("hello\U00010143\u0100*\U00010143".byWchar, "\u0100*", cs) == 7);
885 }
886 }
887
888 @safe pure unittest
889 {
890 import std.conv : to;
891 import std.traits : EnumMembers;
892
893 foreach (S; AliasSeq!(string, wstring, dstring))
894 {
895 foreach (T; AliasSeq!(string, wstring, dstring))
896 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
897 assert(indexOf(cast(S) null, to!T("a"), 1337) == -1);
898 assert(indexOf(to!S("def"), to!T("a"), 0) == -1);
899 assert(indexOf(to!S("abba"), to!T("a"), 2) == 3);
900 assert(indexOf(to!S("def"), to!T("f"), 1) == 2);
901 assert(indexOf(to!S("dfefffg"), to!T("fff"), 1) == 3);
902 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 5) == 6);
903
904 assert(indexOf(to!S("dfeffgfff"), to!T("a"), 1, No.caseSensitive) == -1);
905 assert(indexOf(to!S("def"), to!T("a"), 2, No.caseSensitive) == -1);
906 assert(indexOf(to!S("abba"), to!T("a"), 3, No.caseSensitive) == 3);
907 assert(indexOf(to!S("def"), to!T("f"), 1, No.caseSensitive) == 2);
908 assert(indexOf(to!S("dfefffg"), to!T("fff"), 2, No.caseSensitive) == 3);
909 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 4, No.caseSensitive) == 6);
910 assert(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, No.caseSensitive) == 9,
911 to!string(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, No.caseSensitive))
912 ~ " " ~ S.stringof ~ " " ~ T.stringof);
913
914 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
915 S sMars = "Who\'s \'My Favorite Maritian?\'";
916
917 assert(indexOf(sMars, to!T("MY fAVe"), 10,
918 No.caseSensitive) == -1);
919 assert(indexOf(sMars, to!T("mY fAVOriTe"), 4, No.caseSensitive) == 7);
920 assert(indexOf(sPlts, to!T("mArS:"), 0, No.caseSensitive) == 0);
921 assert(indexOf(sPlts, to!T("rOcK"), 12, No.caseSensitive) == 17);
922 assert(indexOf(sPlts, to!T("Un."), 32, No.caseSensitive) == 41);
923 assert(indexOf(sPlts, to!T(sPlts), 0, No.caseSensitive) == 0);
924
925 assert(indexOf("\u0100", to!T("\u0100"), 0, No.caseSensitive) == 0);
926
927 // Thanks to Carlos Santander B. and zwang
928 assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y",
929 to!T("page-break-before"), 10, No.caseSensitive) == -1);
930
931 // In order for indexOf with and without index to be consistent
932 assert(indexOf(to!S(""), to!T("")) == indexOf(to!S(""), to!T(""), 0));
933 }();
934
935 foreach (cs; EnumMembers!CaseSensitive)
936 {
937 assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"),
938 3, cs) == 9);
939 assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"),
940 3, cs) == 7);
941 assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"),
942 3, cs) == 6);
943 }
944 }
945 }
946
947 /++
948 Params:
949 s = string to search
950 c = character to search for
951 startIdx = the index into s to start searching from
952 cs = $(D Yes.caseSensitive) or $(D No.caseSensitive)
953
954 Returns:
955 The index of the last occurrence of $(D c) in $(D s). If $(D c) is not
956 found, then $(D -1) is returned. The $(D startIdx) slices $(D s) in
957 the following way $(D s[0 .. startIdx]). $(D startIdx) represents a
958 codeunit index in $(D s).
959
960 Throws:
961 If the sequence ending at $(D startIdx) does not represent a well
962 formed codepoint, then a $(REF UTFException, std,utf) may be thrown.
963
964 $(D cs) indicates whether the comparisons are case sensitive.
965 +/
966 ptrdiff_t lastIndexOf(Char)(const(Char)[] s, in dchar c,
967 in CaseSensitive cs = Yes.caseSensitive) @safe pure
968 if (isSomeChar!Char)
969 {
970 static import std.ascii, std.uni;
971 import std.utf : canSearchInCodeUnits;
972 if (cs == Yes.caseSensitive)
973 {
974 if (canSearchInCodeUnits!Char(c))
975 {
976 foreach_reverse (i, it; s)
977 {
978 if (it == c)
979 {
980 return i;
981 }
982 }
983 }
984 else
985 {
986 foreach_reverse (i, dchar it; s)
987 {
988 if (it == c)
989 {
990 return i;
991 }
992 }
993 }
994 }
995 else
996 {
997 if (std.ascii.isASCII(c))
998 {
999 immutable c1 = std.ascii.toLower(c);
1000
1001 foreach_reverse (i, it; s)
1002 {
1003 immutable c2 = std.ascii.toLower(it);
1004 if (c1 == c2)
1005 {
1006 return i;
1007 }
1008 }
1009 }
1010 else
1011 {
1012 immutable c1 = std.uni.toLower(c);
1013
1014 foreach_reverse (i, dchar it; s)
1015 {
1016 immutable c2 = std.uni.toLower(it);
1017 if (c1 == c2)
1018 {
1019 return i;
1020 }
1021 }
1022 }
1023 }
1024
1025 return -1;
1026 }
1027
1028 /// Ditto
1029 ptrdiff_t lastIndexOf(Char)(const(Char)[] s, in dchar c, in size_t startIdx,
1030 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1031 if (isSomeChar!Char)
1032 {
1033 if (startIdx <= s.length)
1034 {
1035 return lastIndexOf(s[0u .. startIdx], c, cs);
1036 }
1037
1038 return -1;
1039 }
1040
1041 ///
1042 @safe pure unittest
1043 {
1044 import std.typecons : No;
1045
1046 string s = "Hello World";
1047 assert(lastIndexOf(s, 'l') == 9);
1048 assert(lastIndexOf(s, 'Z') == -1);
1049 assert(lastIndexOf(s, 'L', No.caseSensitive) == 9);
1050 }
1051
1052 ///
1053 @safe pure unittest
1054 {
1055 import std.typecons : No;
1056
1057 string s = "Hello World";
1058 assert(lastIndexOf(s, 'l', 4) == 3);
1059 assert(lastIndexOf(s, 'Z', 1337) == -1);
1060 assert(lastIndexOf(s, 'L', 7, No.caseSensitive) == 3);
1061 }
1062
1063 @safe pure unittest
1064 {
1065 import std.conv : to;
1066 import std.exception : assertCTFEable;
1067 import std.traits : EnumMembers;
1068
1069 assertCTFEable!(
1070 {
1071 foreach (S; AliasSeq!(string, wstring, dstring))
1072 {
1073 assert(lastIndexOf(cast(S) null, 'a') == -1);
1074 assert(lastIndexOf(to!S("def"), 'a') == -1);
1075 assert(lastIndexOf(to!S("abba"), 'a') == 3);
1076 assert(lastIndexOf(to!S("def"), 'f') == 2);
1077 assert(lastIndexOf(to!S("ödef"), 'ö') == 0);
1078
1079 assert(lastIndexOf(cast(S) null, 'a', No.caseSensitive) == -1);
1080 assert(lastIndexOf(to!S("def"), 'a', No.caseSensitive) == -1);
1081 assert(lastIndexOf(to!S("AbbA"), 'a', No.caseSensitive) == 3);
1082 assert(lastIndexOf(to!S("def"), 'F', No.caseSensitive) == 2);
1083 assert(lastIndexOf(to!S("ödef"), 'ö', No.caseSensitive) == 0);
1084 assert(lastIndexOf(to!S("i\u0100def"), to!dchar("\u0100"),
1085 No.caseSensitive) == 1);
1086
1087 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1088
1089 assert(lastIndexOf(to!S("def"), 'f', No.caseSensitive) == 2);
1090 assert(lastIndexOf(sPlts, 'M', No.caseSensitive) == 34);
1091 assert(lastIndexOf(sPlts, 'S', No.caseSensitive) == 40);
1092 }
1093
1094 foreach (cs; EnumMembers!CaseSensitive)
1095 {
1096 assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4);
1097 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2);
1098 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1);
1099 }
1100 });
1101 }
1102
1103 @safe pure unittest
1104 {
1105 import std.conv : to;
1106 import std.traits : EnumMembers;
1107
1108 foreach (S; AliasSeq!(string, wstring, dstring))
1109 {
1110 assert(lastIndexOf(cast(S) null, 'a') == -1);
1111 assert(lastIndexOf(to!S("def"), 'a') == -1);
1112 assert(lastIndexOf(to!S("abba"), 'a', 3) == 0);
1113 assert(lastIndexOf(to!S("deff"), 'f', 3) == 2);
1114
1115 assert(lastIndexOf(cast(S) null, 'a', No.caseSensitive) == -1);
1116 assert(lastIndexOf(to!S("def"), 'a', No.caseSensitive) == -1);
1117 assert(lastIndexOf(to!S("AbbAa"), 'a', to!ushort(4), No.caseSensitive) == 3,
1118 to!string(lastIndexOf(to!S("AbbAa"), 'a', 4, No.caseSensitive)));
1119 assert(lastIndexOf(to!S("def"), 'F', 3, No.caseSensitive) == 2);
1120
1121 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1122
1123 assert(lastIndexOf(to!S("def"), 'f', 4, No.caseSensitive) == -1);
1124 assert(lastIndexOf(sPlts, 'M', sPlts.length -2, No.caseSensitive) == 34);
1125 assert(lastIndexOf(sPlts, 'S', sPlts.length -2, No.caseSensitive) == 40);
1126 }
1127
1128 foreach (cs; EnumMembers!CaseSensitive)
1129 {
1130 assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4);
1131 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2);
1132 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1);
1133 }
1134 }
1135
1136 /++
1137 Params:
1138 s = string to search
1139 sub = substring to search for
1140 startIdx = the index into s to start searching from
1141 cs = $(D Yes.caseSensitive) or $(D No.caseSensitive)
1142
1143 Returns:
1144 the index of the last occurrence of $(D sub) in $(D s). If $(D sub) is
1145 not found, then $(D -1) is returned. The $(D startIdx) slices $(D s)
1146 in the following way $(D s[0 .. startIdx]). $(D startIdx) represents a
1147 codeunit index in $(D s).
1148
1149 Throws:
1150 If the sequence ending at $(D startIdx) does not represent a well
1151 formed codepoint, then a $(REF UTFException, std,utf) may be thrown.
1152
1153 $(D cs) indicates whether the comparisons are case sensitive.
1154 +/
1155 ptrdiff_t lastIndexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub,
1156 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1157 if (isSomeChar!Char1 && isSomeChar!Char2)
1158 {
1159 import std.algorithm.searching : endsWith;
1160 import std.conv : to;
1161 import std.range.primitives : walkLength;
1162 static import std.uni;
1163 import std.utf : strideBack;
1164 if (sub.empty)
1165 return -1;
1166
1167 if (walkLength(sub) == 1)
1168 return lastIndexOf(s, sub.front, cs);
1169
1170 if (cs == Yes.caseSensitive)
1171 {
1172 static if (is(Unqual!Char1 == Unqual!Char2))
1173 {
1174 import core.stdc.string : memcmp;
1175
1176 immutable c = sub[0];
1177
1178 for (ptrdiff_t i = s.length - sub.length; i >= 0; --i)
1179 {
1180 if (s[i] == c)
1181 {
1182 if (__ctfe)
1183 {
1184 foreach (j; 1 .. sub.length)
1185 {
1186 if (s[i + j] != sub[j])
1187 continue;
1188 }
1189 return i;
1190 }
1191 else
1192 {
1193 auto trustedMemcmp(in void* s1, in void* s2, size_t n) @trusted
1194 {
1195 return memcmp(s1, s2, n);
1196 }
1197 if (trustedMemcmp(&s[i + 1], &sub[1],
1198 (sub.length - 1) * Char1.sizeof) == 0)
1199 return i;
1200 }
1201 }
1202 }
1203 }
1204 else
1205 {
1206 for (size_t i = s.length; !s.empty;)
1207 {
1208 if (s.endsWith(sub))
1209 return cast(ptrdiff_t) i - to!(const(Char1)[])(sub).length;
1210
1211 i -= strideBack(s, i);
1212 s = s[0 .. i];
1213 }
1214 }
1215 }
1216 else
1217 {
1218 for (size_t i = s.length; !s.empty;)
1219 {
1220 if (endsWith!((a, b) => std.uni.toLower(a) == std.uni.toLower(b))
1221 (s, sub))
1222 {
1223 return cast(ptrdiff_t) i - to!(const(Char1)[])(sub).length;
1224 }
1225
1226 i -= strideBack(s, i);
1227 s = s[0 .. i];
1228 }
1229 }
1230
1231 return -1;
1232 }
1233
1234 /// Ditto
1235 ptrdiff_t lastIndexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub,
1236 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive) @safe pure
1237 if (isSomeChar!Char1 && isSomeChar!Char2)
1238 {
1239 if (startIdx <= s.length)
1240 {
1241 return lastIndexOf(s[0u .. startIdx], sub, cs);
1242 }
1243
1244 return -1;
1245 }
1246
1247 ///
1248 @safe pure unittest
1249 {
1250 import std.typecons : No;
1251
1252 string s = "Hello World";
1253 assert(lastIndexOf(s, "ll") == 2);
1254 assert(lastIndexOf(s, "Zo") == -1);
1255 assert(lastIndexOf(s, "lL", No.caseSensitive) == 2);
1256 }
1257
1258 ///
1259 @safe pure unittest
1260 {
1261 import std.typecons : No;
1262
1263 string s = "Hello World";
1264 assert(lastIndexOf(s, "ll", 4) == 2);
1265 assert(lastIndexOf(s, "Zo", 128) == -1);
1266 assert(lastIndexOf(s, "lL", 3, No.caseSensitive) == -1);
1267 }
1268
1269 @safe pure unittest
1270 {
1271 import std.conv : to;
1272
1273 foreach (S; AliasSeq!(string, wstring, dstring))
1274 {
1275 auto r = to!S("").lastIndexOf("hello");
1276 assert(r == -1, to!string(r));
1277
1278 r = to!S("hello").lastIndexOf("");
1279 assert(r == -1, to!string(r));
1280
1281 r = to!S("").lastIndexOf("");
1282 assert(r == -1, to!string(r));
1283 }
1284 }
1285
1286 @safe pure unittest
1287 {
1288 import std.conv : to;
1289 import std.exception : assertCTFEable;
1290 import std.traits : EnumMembers;
1291
1292 assertCTFEable!(
1293 {
1294 foreach (S; AliasSeq!(string, wstring, dstring))
1295 {
1296 foreach (T; AliasSeq!(string, wstring, dstring))
1297 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
1298 enum typeStr = S.stringof ~ " " ~ T.stringof;
1299
1300 assert(lastIndexOf(cast(S) null, to!T("a")) == -1, typeStr);
1301 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c")) == 6, typeStr);
1302 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd")) == 6, typeStr);
1303 assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef")) == 8, typeStr);
1304 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c")) == 2, typeStr);
1305 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd")) == 2, typeStr);
1306 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x")) == -1, typeStr);
1307 assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy")) == -1, typeStr);
1308 assert(lastIndexOf(to!S("abcdefcdef"), to!T("")) == -1, typeStr);
1309 assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö")) == 0, typeStr);
1310
1311 assert(lastIndexOf(cast(S) null, to!T("a"), No.caseSensitive) == -1, typeStr);
1312 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), No.caseSensitive) == 6, typeStr);
1313 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), No.caseSensitive) == 6, typeStr);
1314 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"), No.caseSensitive) == -1, typeStr);
1315 assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy"), No.caseSensitive) == -1, typeStr);
1316 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), No.caseSensitive) == -1, typeStr);
1317 assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö"), No.caseSensitive) == 0, typeStr);
1318
1319 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), No.caseSensitive) == 6, typeStr);
1320 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), No.caseSensitive) == 6, typeStr);
1321 assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), No.caseSensitive) == 7, typeStr);
1322
1323 assert(lastIndexOf(to!S("ödfeffgfff"), to!T("ö"), Yes.caseSensitive) == 0);
1324
1325 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1326 S sMars = "Who\'s \'My Favorite Maritian?\'";
1327
1328 assert(lastIndexOf(sMars, to!T("RiTE maR"), No.caseSensitive) == 14, typeStr);
1329 assert(lastIndexOf(sPlts, to!T("FOuRTh"), No.caseSensitive) == 10, typeStr);
1330 assert(lastIndexOf(sMars, to!T("whO\'s \'MY"), No.caseSensitive) == 0, typeStr);
1331 assert(lastIndexOf(sMars, to!T(sMars), No.caseSensitive) == 0, typeStr);
1332 }();
1333
1334 foreach (cs; EnumMembers!CaseSensitive)
1335 {
1336 enum csString = to!string(cs);
1337
1338 assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), cs) == 4, csString);
1339 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), cs) == 2, csString);
1340 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), cs) == 1, csString);
1341 }
1342 }
1343 });
1344 }
1345
1346 @safe pure unittest // issue13529
1347 {
1348 import std.conv : to;
1349 foreach (S; AliasSeq!(string, wstring, dstring))
1350 {
1351 foreach (T; AliasSeq!(string, wstring, dstring))
1352 {
1353 enum typeStr = S.stringof ~ " " ~ T.stringof;
1354 auto idx = lastIndexOf(to!T("Hällö Wörldö ö"),to!S("ö ö"));
1355 assert(idx != -1, to!string(idx) ~ " " ~ typeStr);
1356
1357 idx = lastIndexOf(to!T("Hällö Wörldö ö"),to!S("ö öd"));
1358 assert(idx == -1, to!string(idx) ~ " " ~ typeStr);
1359 }
1360 }
1361 }
1362
1363 @safe pure unittest
1364 {
1365 import std.conv : to;
1366 import std.traits : EnumMembers;
1367
1368 foreach (S; AliasSeq!(string, wstring, dstring))
1369 {
1370 foreach (T; AliasSeq!(string, wstring, dstring))
1371 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
1372 enum typeStr = S.stringof ~ " " ~ T.stringof;
1373
1374 assert(lastIndexOf(cast(S) null, to!T("a")) == -1, typeStr);
1375 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 5) == 2, typeStr);
1376 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 3) == -1, typeStr);
1377 assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6) == 4, typeStr ~
1378 format(" %u", lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6)));
1379 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5) == 2, typeStr);
1380 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd"), 3) == -1, typeStr);
1381 assert(lastIndexOf(to!S("abcdefcdefx"), to!T("x"), 1) == -1, typeStr);
1382 assert(lastIndexOf(to!S("abcdefcdefxy"), to!T("xy"), 6) == -1, typeStr);
1383 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 8) == -1, typeStr);
1384 assert(lastIndexOf(to!S("öafö"), to!T("ö"), 3) == 0, typeStr ~
1385 to!string(lastIndexOf(to!S("öafö"), to!T("ö"), 3))); //BUG 10472
1386
1387 assert(lastIndexOf(cast(S) null, to!T("a"), 1, No.caseSensitive) == -1, typeStr);
1388 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5, No.caseSensitive) == 2, typeStr);
1389 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 4, No.caseSensitive) == 2, typeStr ~
1390 " " ~ to!string(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 3, No.caseSensitive)));
1391 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"),3 , No.caseSensitive) == -1, typeStr);
1392 assert(lastIndexOf(to!S("abcdefcdefXY"), to!T("xy"), 4, No.caseSensitive) == -1, typeStr);
1393 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 7, No.caseSensitive) == -1, typeStr);
1394
1395 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 4, No.caseSensitive) == 2, typeStr);
1396 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 4, No.caseSensitive) == 2, typeStr);
1397 assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), 6, No.caseSensitive) == 3, typeStr);
1398 assert(lastIndexOf(to!S(""), to!T(""), 0) == lastIndexOf(to!S(""), to!T("")), typeStr);
1399 }();
1400
1401 foreach (cs; EnumMembers!CaseSensitive)
1402 {
1403 enum csString = to!string(cs);
1404
1405 assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), 6, cs) == 4, csString);
1406 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), 6, cs) == 2, csString);
1407 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), 3, cs) == 1, csString);
1408 }
1409 }
1410 }
1411
1412 private ptrdiff_t indexOfAnyNeitherImpl(bool forward, bool any, Char, Char2)(
1413 const(Char)[] haystack, const(Char2)[] needles,
1414 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1415 if (isSomeChar!Char && isSomeChar!Char2)
1416 {
1417 import std.algorithm.searching : canFind, findAmong;
1418 if (cs == Yes.caseSensitive)
1419 {
1420 static if (forward)
1421 {
1422 static if (any)
1423 {
1424 size_t n = haystack.findAmong(needles).length;
1425 return n ? haystack.length - n : -1;
1426 }
1427 else
1428 {
1429 foreach (idx, dchar hay; haystack)
1430 {
1431 if (!canFind(needles, hay))
1432 {
1433 return idx;
1434 }
1435 }
1436 }
1437 }
1438 else
1439 {
1440 static if (any)
1441 {
1442 import std.range : retro;
1443 import std.utf : strideBack;
1444 size_t n = haystack.retro.findAmong(needles).source.length;
1445 if (n)
1446 {
1447 return n - haystack.strideBack(n);
1448 }
1449 }
1450 else
1451 {
1452 foreach_reverse (idx, dchar hay; haystack)
1453 {
1454 if (!canFind(needles, hay))
1455 {
1456 return idx;
1457 }
1458 }
1459 }
1460 }
1461 }
1462 else
1463 {
1464 import std.range.primitives : walkLength;
1465 if (needles.length <= 16 && needles.walkLength(17))
1466 {
1467 size_t si = 0;
1468 dchar[16] scratch = void;
1469 foreach ( dchar c; needles)
1470 {
1471 scratch[si++] = toLower(c);
1472 }
1473
1474 static if (forward)
1475 {
1476 foreach (i, dchar c; haystack)
1477 {
1478 if (canFind(scratch[0 .. si], toLower(c)) == any)
1479 {
1480 return i;
1481 }
1482 }
1483 }
1484 else
1485 {
1486 foreach_reverse (i, dchar c; haystack)
1487 {
1488 if (canFind(scratch[0 .. si], toLower(c)) == any)
1489 {
1490 return i;
1491 }
1492 }
1493 }
1494 }
1495 else
1496 {
1497 static bool f(dchar a, dchar b)
1498 {
1499 return toLower(a) == b;
1500 }
1501
1502 static if (forward)
1503 {
1504 foreach (i, dchar c; haystack)
1505 {
1506 if (canFind!f(needles, toLower(c)) == any)
1507 {
1508 return i;
1509 }
1510 }
1511 }
1512 else
1513 {
1514 foreach_reverse (i, dchar c; haystack)
1515 {
1516 if (canFind!f(needles, toLower(c)) == any)
1517 {
1518 return i;
1519 }
1520 }
1521 }
1522 }
1523 }
1524
1525 return -1;
1526 }
1527
1528 /**
1529 Returns the index of the first occurrence of any of the elements in $(D
1530 needles) in $(D haystack). If no element of $(D needles) is found,
1531 then $(D -1) is returned. The $(D startIdx) slices $(D haystack) in the
1532 following way $(D haystack[startIdx .. $]). $(D startIdx) represents a
1533 codeunit index in $(D haystack). If the sequence ending at $(D startIdx)
1534 does not represent a well formed codepoint, then a $(REF UTFException, std,utf)
1535 may be thrown.
1536
1537 Params:
1538 haystack = String to search for needles in.
1539 needles = Strings to search for in haystack.
1540 startIdx = slices haystack like this $(D haystack[startIdx .. $]). If
1541 the startIdx is greater equal the length of haystack the functions
1542 returns $(D -1).
1543 cs = Indicates whether the comparisons are case sensitive.
1544 */
1545 ptrdiff_t indexOfAny(Char,Char2)(const(Char)[] haystack, const(Char2)[] needles,
1546 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1547 if (isSomeChar!Char && isSomeChar!Char2)
1548 {
1549 return indexOfAnyNeitherImpl!(true, true)(haystack, needles, cs);
1550 }
1551
1552 /// Ditto
1553 ptrdiff_t indexOfAny(Char,Char2)(const(Char)[] haystack, const(Char2)[] needles,
1554 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive) @safe pure
1555 if (isSomeChar!Char && isSomeChar!Char2)
1556 {
1557 if (startIdx < haystack.length)
1558 {
1559 ptrdiff_t foundIdx = indexOfAny(haystack[startIdx .. $], needles, cs);
1560 if (foundIdx != -1)
1561 {
1562 return foundIdx + cast(ptrdiff_t) startIdx;
1563 }
1564 }
1565
1566 return -1;
1567 }
1568
1569 ///
1570 @safe pure unittest
1571 {
1572 import std.conv : to;
1573
1574 ptrdiff_t i = "helloWorld".indexOfAny("Wr");
1575 assert(i == 5);
1576 i = "öällo world".indexOfAny("lo ");
1577 assert(i == 4, to!string(i));
1578 }
1579
1580 ///
1581 @safe pure unittest
1582 {
1583 import std.conv : to;
1584
1585 ptrdiff_t i = "helloWorld".indexOfAny("Wr", 4);
1586 assert(i == 5);
1587
1588 i = "Foo öällo world".indexOfAny("lh", 3);
1589 assert(i == 8, to!string(i));
1590 }
1591
1592 @safe pure unittest
1593 {
1594 import std.conv : to;
1595
1596 foreach (S; AliasSeq!(string, wstring, dstring))
1597 {
1598 auto r = to!S("").indexOfAny("hello");
1599 assert(r == -1, to!string(r));
1600
1601 r = to!S("hello").indexOfAny("");
1602 assert(r == -1, to!string(r));
1603
1604 r = to!S("").indexOfAny("");
1605 assert(r == -1, to!string(r));
1606 }
1607 }
1608
1609 @safe pure unittest
1610 {
1611 import std.conv : to;
1612 import std.exception : assertCTFEable;
1613
1614 assertCTFEable!(
1615 {
1616 foreach (S; AliasSeq!(string, wstring, dstring))
1617 {
1618 foreach (T; AliasSeq!(string, wstring, dstring))
1619 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
1620 assert(indexOfAny(cast(S) null, to!T("a")) == -1);
1621 assert(indexOfAny(to!S("def"), to!T("rsa")) == -1);
1622 assert(indexOfAny(to!S("abba"), to!T("a")) == 0);
1623 assert(indexOfAny(to!S("def"), to!T("f")) == 2);
1624 assert(indexOfAny(to!S("dfefffg"), to!T("fgh")) == 1);
1625 assert(indexOfAny(to!S("dfeffgfff"), to!T("feg")) == 1);
1626
1627 assert(indexOfAny(to!S("zfeffgfff"), to!T("ACDC"),
1628 No.caseSensitive) == -1);
1629 assert(indexOfAny(to!S("def"), to!T("MI6"),
1630 No.caseSensitive) == -1);
1631 assert(indexOfAny(to!S("abba"), to!T("DEA"),
1632 No.caseSensitive) == 0);
1633 assert(indexOfAny(to!S("def"), to!T("FBI"), No.caseSensitive) == 2);
1634 assert(indexOfAny(to!S("dfefffg"), to!T("NSA"), No.caseSensitive)
1635 == -1);
1636 assert(indexOfAny(to!S("dfeffgfff"), to!T("BND"),
1637 No.caseSensitive) == 0);
1638 assert(indexOfAny(to!S("dfeffgfff"), to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"),
1639 No.caseSensitive) == 0);
1640
1641 assert(indexOfAny("\u0100", to!T("\u0100"), No.caseSensitive) == 0);
1642 }();
1643 }
1644 }
1645 );
1646 }
1647
1648 @safe pure unittest
1649 {
1650 import std.conv : to;
1651 import std.traits : EnumMembers;
1652
1653 foreach (S; AliasSeq!(string, wstring, dstring))
1654 {
1655 foreach (T; AliasSeq!(string, wstring, dstring))
1656 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
1657 assert(indexOfAny(cast(S) null, to!T("a"), 1337) == -1);
1658 assert(indexOfAny(to!S("def"), to!T("AaF"), 0) == -1);
1659 assert(indexOfAny(to!S("abba"), to!T("NSa"), 2) == 3);
1660 assert(indexOfAny(to!S("def"), to!T("fbi"), 1) == 2);
1661 assert(indexOfAny(to!S("dfefffg"), to!T("foo"), 2) == 3);
1662 assert(indexOfAny(to!S("dfeffgfff"), to!T("fsb"), 5) == 6);
1663
1664 assert(indexOfAny(to!S("dfeffgfff"), to!T("NDS"), 1,
1665 No.caseSensitive) == -1);
1666 assert(indexOfAny(to!S("def"), to!T("DRS"), 2,
1667 No.caseSensitive) == -1);
1668 assert(indexOfAny(to!S("abba"), to!T("SI"), 3,
1669 No.caseSensitive) == -1);
1670 assert(indexOfAny(to!S("deO"), to!T("ASIO"), 1,
1671 No.caseSensitive) == 2);
1672 assert(indexOfAny(to!S("dfefffg"), to!T("fbh"), 2,
1673 No.caseSensitive) == 3);
1674 assert(indexOfAny(to!S("dfeffgfff"), to!T("fEe"), 4,
1675 No.caseSensitive) == 4);
1676 assert(indexOfAny(to!S("dfeffgffföä"), to!T("föä"), 9,
1677 No.caseSensitive) == 9);
1678
1679 assert(indexOfAny("\u0100", to!T("\u0100"), 0,
1680 No.caseSensitive) == 0);
1681 }();
1682
1683 foreach (cs; EnumMembers!CaseSensitive)
1684 {
1685 assert(indexOfAny("hello\U00010143\u0100\U00010143",
1686 to!S("e\u0100"), 3, cs) == 9);
1687 assert(indexOfAny("hello\U00010143\u0100\U00010143"w,
1688 to!S("h\u0100"), 3, cs) == 7);
1689 assert(indexOfAny("hello\U00010143\u0100\U00010143"d,
1690 to!S("l\u0100"), 5, cs) == 6);
1691 }
1692 }
1693 }
1694
1695 /**
1696 Returns the index of the last occurrence of any of the elements in $(D
1697 needles) in $(D haystack). If no element of $(D needles) is found,
1698 then $(D -1) is returned. The $(D stopIdx) slices $(D haystack) in the
1699 following way $(D s[0 .. stopIdx]). $(D stopIdx) represents a codeunit
1700 index in $(D haystack). If the sequence ending at $(D startIdx) does not
1701 represent a well formed codepoint, then a $(REF UTFException, std,utf) may be
1702 thrown.
1703
1704 Params:
1705 haystack = String to search for needles in.
1706 needles = Strings to search for in haystack.
1707 stopIdx = slices haystack like this $(D haystack[0 .. stopIdx]). If
1708 the stopIdx is greater equal the length of haystack the functions
1709 returns $(D -1).
1710 cs = Indicates whether the comparisons are case sensitive.
1711 */
1712 ptrdiff_t lastIndexOfAny(Char,Char2)(const(Char)[] haystack,
1713 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive)
1714 @safe pure
1715 if (isSomeChar!Char && isSomeChar!Char2)
1716 {
1717 return indexOfAnyNeitherImpl!(false, true)(haystack, needles, cs);
1718 }
1719
1720 /// Ditto
1721 ptrdiff_t lastIndexOfAny(Char,Char2)(const(Char)[] haystack,
1722 const(Char2)[] needles, in size_t stopIdx,
1723 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1724 if (isSomeChar!Char && isSomeChar!Char2)
1725 {
1726 if (stopIdx <= haystack.length)
1727 {
1728 return lastIndexOfAny(haystack[0u .. stopIdx], needles, cs);
1729 }
1730
1731 return -1;
1732 }
1733
1734 ///
1735 @safe pure unittest
1736 {
1737 ptrdiff_t i = "helloWorld".lastIndexOfAny("Wlo");
1738 assert(i == 8);
1739
1740 i = "Foo öäöllo world".lastIndexOfAny("öF");
1741 assert(i == 8);
1742 }
1743
1744 ///
1745 @safe pure unittest
1746 {
1747 import std.conv : to;
1748
1749 ptrdiff_t i = "helloWorld".lastIndexOfAny("Wlo", 4);
1750 assert(i == 3);
1751
1752 i = "Foo öäöllo world".lastIndexOfAny("öF", 3);
1753 assert(i == 0);
1754 }
1755
1756 @safe pure unittest
1757 {
1758 import std.conv : to;
1759
1760 foreach (S; AliasSeq!(string, wstring, dstring))
1761 {
1762 auto r = to!S("").lastIndexOfAny("hello");
1763 assert(r == -1, to!string(r));
1764
1765 r = to!S("hello").lastIndexOfAny("");
1766 assert(r == -1, to!string(r));
1767
1768 r = to!S("").lastIndexOfAny("");
1769 assert(r == -1, to!string(r));
1770 }
1771 }
1772
1773 @safe pure unittest
1774 {
1775 import std.conv : to;
1776 import std.exception : assertCTFEable;
1777
1778 assertCTFEable!(
1779 {
1780 foreach (S; AliasSeq!(string, wstring, dstring))
1781 {
1782 foreach (T; AliasSeq!(string, wstring, dstring))
1783 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
1784 assert(lastIndexOfAny(cast(S) null, to!T("a")) == -1);
1785 assert(lastIndexOfAny(to!S("def"), to!T("rsa")) == -1);
1786 assert(lastIndexOfAny(to!S("abba"), to!T("a")) == 3);
1787 assert(lastIndexOfAny(to!S("def"), to!T("f")) == 2);
1788 assert(lastIndexOfAny(to!S("dfefffg"), to!T("fgh")) == 6);
1789
1790 ptrdiff_t oeIdx = 9;
1791 if (is(S == wstring) || is(S == dstring))
1792 {
1793 oeIdx = 8;
1794 }
1795
1796 auto foundOeIdx = lastIndexOfAny(to!S("dfeffgföf"), to!T("feg"));
1797 assert(foundOeIdx == oeIdx, to!string(foundOeIdx));
1798
1799 assert(lastIndexOfAny(to!S("zfeffgfff"), to!T("ACDC"),
1800 No.caseSensitive) == -1);
1801 assert(lastIndexOfAny(to!S("def"), to!T("MI6"),
1802 No.caseSensitive) == -1);
1803 assert(lastIndexOfAny(to!S("abba"), to!T("DEA"),
1804 No.caseSensitive) == 3);
1805 assert(lastIndexOfAny(to!S("def"), to!T("FBI"),
1806 No.caseSensitive) == 2);
1807 assert(lastIndexOfAny(to!S("dfefffg"), to!T("NSA"),
1808 No.caseSensitive) == -1);
1809
1810 oeIdx = 2;
1811 if (is(S == wstring) || is(S == dstring))
1812 {
1813 oeIdx = 1;
1814 }
1815 assert(lastIndexOfAny(to!S("ödfeffgfff"), to!T("BND"),
1816 No.caseSensitive) == oeIdx);
1817
1818 assert(lastIndexOfAny("\u0100", to!T("\u0100"),
1819 No.caseSensitive) == 0);
1820 }();
1821 }
1822 }
1823 );
1824 }
1825
1826 @safe pure unittest
1827 {
1828 import std.conv : to;
1829 import std.exception : assertCTFEable;
1830
1831 assertCTFEable!(
1832 {
1833 foreach (S; AliasSeq!(string, wstring, dstring))
1834 {
1835 foreach (T; AliasSeq!(string, wstring, dstring))
1836 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
1837 enum typeStr = S.stringof ~ " " ~ T.stringof;
1838
1839 assert(lastIndexOfAny(cast(S) null, to!T("a"), 1337) == -1,
1840 typeStr);
1841 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("c"), 7) == 6,
1842 typeStr);
1843 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("cd"), 5) == 3,
1844 typeStr);
1845 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("ef"), 6) == 5,
1846 typeStr);
1847 assert(lastIndexOfAny(to!S("abcdefCdef"), to!T("c"), 8) == 2,
1848 typeStr);
1849 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("x"), 7) == -1,
1850 typeStr);
1851 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("xy"), 4) == -1,
1852 typeStr);
1853 assert(lastIndexOfAny(to!S("öabcdefcdef"), to!T("ö"), 2) == 0,
1854 typeStr);
1855
1856 assert(lastIndexOfAny(cast(S) null, to!T("a"), 1337,
1857 No.caseSensitive) == -1, typeStr);
1858 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("C"), 7,
1859 No.caseSensitive) == 6, typeStr);
1860 assert(lastIndexOfAny(to!S("ABCDEFCDEF"), to!T("cd"), 5,
1861 No.caseSensitive) == 3, typeStr);
1862 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("EF"), 6,
1863 No.caseSensitive) == 5, typeStr);
1864 assert(lastIndexOfAny(to!S("ABCDEFcDEF"), to!T("C"), 8,
1865 No.caseSensitive) == 6, typeStr);
1866 assert(lastIndexOfAny(to!S("ABCDEFCDEF"), to!T("x"), 7,
1867 No.caseSensitive) == -1, typeStr);
1868 assert(lastIndexOfAny(to!S("abCdefcdef"), to!T("XY"), 4,
1869 No.caseSensitive) == -1, typeStr);
1870 assert(lastIndexOfAny(to!S("ÖABCDEFCDEF"), to!T("ö"), 2,
1871 No.caseSensitive) == 0, typeStr);
1872 }();
1873 }
1874 }
1875 );
1876 }
1877
1878 /**
1879 Returns the index of the first occurrence of any character not an elements
1880 in $(D needles) in $(D haystack). If all element of $(D haystack) are
1881 element of $(D needles) $(D -1) is returned.
1882
1883 Params:
1884 haystack = String to search for needles in.
1885 needles = Strings to search for in haystack.
1886 startIdx = slices haystack like this $(D haystack[startIdx .. $]). If
1887 the startIdx is greater equal the length of haystack the functions
1888 returns $(D -1).
1889 cs = Indicates whether the comparisons are case sensitive.
1890 */
1891 ptrdiff_t indexOfNeither(Char,Char2)(const(Char)[] haystack,
1892 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive)
1893 @safe pure
1894 if (isSomeChar!Char && isSomeChar!Char2)
1895 {
1896 return indexOfAnyNeitherImpl!(true, false)(haystack, needles, cs);
1897 }
1898
1899 /// Ditto
1900 ptrdiff_t indexOfNeither(Char,Char2)(const(Char)[] haystack,
1901 const(Char2)[] needles, in size_t startIdx,
1902 in CaseSensitive cs = Yes.caseSensitive)
1903 @safe pure
1904 if (isSomeChar!Char && isSomeChar!Char2)
1905 {
1906 if (startIdx < haystack.length)
1907 {
1908 ptrdiff_t foundIdx = indexOfAnyNeitherImpl!(true, false)(
1909 haystack[startIdx .. $], needles, cs);
1910 if (foundIdx != -1)
1911 {
1912 return foundIdx + cast(ptrdiff_t) startIdx;
1913 }
1914 }
1915 return -1;
1916 }
1917
1918 ///
1919 @safe pure unittest
1920 {
1921 assert(indexOfNeither("abba", "a", 2) == 2);
1922 assert(indexOfNeither("def", "de", 1) == 2);
1923 assert(indexOfNeither("dfefffg", "dfe", 4) == 6);
1924 }
1925
1926 ///
1927 @safe pure unittest
1928 {
1929 assert(indexOfNeither("def", "a") == 0);
1930 assert(indexOfNeither("def", "de") == 2);
1931 assert(indexOfNeither("dfefffg", "dfe") == 6);
1932 }
1933
1934 @safe pure unittest
1935 {
1936 import std.conv : to;
1937
1938 foreach (S; AliasSeq!(string, wstring, dstring))
1939 {
1940 auto r = to!S("").indexOfNeither("hello");
1941 assert(r == -1, to!string(r));
1942
1943 r = to!S("hello").indexOfNeither("");
1944 assert(r == 0, to!string(r));
1945
1946 r = to!S("").indexOfNeither("");
1947 assert(r == -1, to!string(r));
1948 }
1949 }
1950
1951 @safe pure unittest
1952 {
1953 import std.conv : to;
1954 import std.exception : assertCTFEable;
1955
1956 assertCTFEable!(
1957 {
1958 foreach (S; AliasSeq!(string, wstring, dstring))
1959 {
1960 foreach (T; AliasSeq!(string, wstring, dstring))
1961 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
1962 assert(indexOfNeither(cast(S) null, to!T("a")) == -1);
1963 assert(indexOfNeither("abba", "a") == 1);
1964
1965 assert(indexOfNeither(to!S("dfeffgfff"), to!T("a"),
1966 No.caseSensitive) == 0);
1967 assert(indexOfNeither(to!S("def"), to!T("D"),
1968 No.caseSensitive) == 1);
1969 assert(indexOfNeither(to!S("ABca"), to!T("a"),
1970 No.caseSensitive) == 1);
1971 assert(indexOfNeither(to!S("def"), to!T("f"),
1972 No.caseSensitive) == 0);
1973 assert(indexOfNeither(to!S("DfEfffg"), to!T("dFe"),
1974 No.caseSensitive) == 6);
1975 if (is(S == string))
1976 {
1977 assert(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
1978 No.caseSensitive) == 8,
1979 to!string(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
1980 No.caseSensitive)));
1981 }
1982 else
1983 {
1984 assert(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
1985 No.caseSensitive) == 7,
1986 to!string(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
1987 No.caseSensitive)));
1988 }
1989 }();
1990 }
1991 }
1992 );
1993 }
1994
1995 @safe pure unittest
1996 {
1997 import std.conv : to;
1998 import std.exception : assertCTFEable;
1999
2000 assertCTFEable!(
2001 {
2002 foreach (S; AliasSeq!(string, wstring, dstring))
2003 {
2004 foreach (T; AliasSeq!(string, wstring, dstring))
2005 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
2006 assert(indexOfNeither(cast(S) null, to!T("a"), 1) == -1);
2007 assert(indexOfNeither(to!S("def"), to!T("a"), 1) == 1,
2008 to!string(indexOfNeither(to!S("def"), to!T("a"), 1)));
2009
2010 assert(indexOfNeither(to!S("dfeffgfff"), to!T("a"), 4,
2011 No.caseSensitive) == 4);
2012 assert(indexOfNeither(to!S("def"), to!T("D"), 2,
2013 No.caseSensitive) == 2);
2014 assert(indexOfNeither(to!S("ABca"), to!T("a"), 3,
2015 No.caseSensitive) == -1);
2016 assert(indexOfNeither(to!S("def"), to!T("tzf"), 2,
2017 No.caseSensitive) == -1);
2018 assert(indexOfNeither(to!S("DfEfffg"), to!T("dFe"), 5,
2019 No.caseSensitive) == 6);
2020 if (is(S == string))
2021 {
2022 assert(indexOfNeither(to!S("öDfEfffg"), to!T("äDi"), 2,
2023 No.caseSensitive) == 3, to!string(indexOfNeither(
2024 to!S("öDfEfffg"), to!T("äDi"), 2, No.caseSensitive)));
2025 }
2026 else
2027 {
2028 assert(indexOfNeither(to!S("öDfEfffg"), to!T("äDi"), 2,
2029 No.caseSensitive) == 2, to!string(indexOfNeither(
2030 to!S("öDfEfffg"), to!T("äDi"), 2, No.caseSensitive)));
2031 }
2032 }();
2033 }
2034 }
2035 );
2036 }
2037
2038 /**
2039 Returns the last index of the first occurence of any character that is not
2040 an elements in $(D needles) in $(D haystack). If all element of
2041 $(D haystack) are element of $(D needles) $(D -1) is returned.
2042
2043 Params:
2044 haystack = String to search for needles in.
2045 needles = Strings to search for in haystack.
2046 stopIdx = slices haystack like this $(D haystack[0 .. stopIdx]) If
2047 the stopIdx is greater equal the length of haystack the functions
2048 returns $(D -1).
2049 cs = Indicates whether the comparisons are case sensitive.
2050 */
2051 ptrdiff_t lastIndexOfNeither(Char,Char2)(const(Char)[] haystack,
2052 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive)
2053 @safe pure
2054 if (isSomeChar!Char && isSomeChar!Char2)
2055 {
2056 return indexOfAnyNeitherImpl!(false, false)(haystack, needles, cs);
2057 }
2058
2059 /// Ditto
2060 ptrdiff_t lastIndexOfNeither(Char,Char2)(const(Char)[] haystack,
2061 const(Char2)[] needles, in size_t stopIdx,
2062 in CaseSensitive cs = Yes.caseSensitive)
2063 @safe pure
2064 if (isSomeChar!Char && isSomeChar!Char2)
2065 {
2066 if (stopIdx < haystack.length)
2067 {
2068 return indexOfAnyNeitherImpl!(false, false)(haystack[0 .. stopIdx],
2069 needles, cs);
2070 }
2071 return -1;
2072 }
2073
2074 ///
2075 @safe pure unittest
2076 {
2077 assert(lastIndexOfNeither("abba", "a") == 2);
2078 assert(lastIndexOfNeither("def", "f") == 1);
2079 }
2080
2081 ///
2082 @safe pure unittest
2083 {
2084 assert(lastIndexOfNeither("def", "rsa", 3) == -1);
2085 assert(lastIndexOfNeither("abba", "a", 2) == 1);
2086 }
2087
2088 @safe pure unittest
2089 {
2090 import std.conv : to;
2091
2092 foreach (S; AliasSeq!(string, wstring, dstring))
2093 {
2094 auto r = to!S("").lastIndexOfNeither("hello");
2095 assert(r == -1, to!string(r));
2096
2097 r = to!S("hello").lastIndexOfNeither("");
2098 assert(r == 4, to!string(r));
2099
2100 r = to!S("").lastIndexOfNeither("");
2101 assert(r == -1, to!string(r));
2102 }
2103 }
2104
2105 @safe pure unittest
2106 {
2107 import std.conv : to;
2108 import std.exception : assertCTFEable;
2109
2110 assertCTFEable!(
2111 {
2112 foreach (S; AliasSeq!(string, wstring, dstring))
2113 {
2114 foreach (T; AliasSeq!(string, wstring, dstring))
2115 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
2116 assert(lastIndexOfNeither(cast(S) null, to!T("a")) == -1);
2117 assert(lastIndexOfNeither(to!S("def"), to!T("rsa")) == 2);
2118 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("fgh")) == 2);
2119
2120 ptrdiff_t oeIdx = 8;
2121 if (is(S == string))
2122 {
2123 oeIdx = 9;
2124 }
2125
2126 auto foundOeIdx = lastIndexOfNeither(to!S("ödfefegff"), to!T("zeg"));
2127 assert(foundOeIdx == oeIdx, to!string(foundOeIdx));
2128
2129 assert(lastIndexOfNeither(to!S("zfeffgfsb"), to!T("FSB"),
2130 No.caseSensitive) == 5);
2131 assert(lastIndexOfNeither(to!S("def"), to!T("MI6"),
2132 No.caseSensitive) == 2, to!string(lastIndexOfNeither(to!S("def"),
2133 to!T("MI6"), No.caseSensitive)));
2134 assert(lastIndexOfNeither(to!S("abbadeafsb"), to!T("fSb"),
2135 No.caseSensitive) == 6, to!string(lastIndexOfNeither(
2136 to!S("abbadeafsb"), to!T("fSb"), No.caseSensitive)));
2137 assert(lastIndexOfNeither(to!S("defbi"), to!T("FBI"),
2138 No.caseSensitive) == 1);
2139 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("NSA"),
2140 No.caseSensitive) == 6);
2141 assert(lastIndexOfNeither(to!S("dfeffgfffö"), to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"),
2142 No.caseSensitive) == 8, to!string(lastIndexOfNeither(to!S("dfeffgfffö"),
2143 to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"), No.caseSensitive)));
2144 }();
2145 }
2146 }
2147 );
2148 }
2149
2150 @safe pure unittest
2151 {
2152 import std.conv : to;
2153 import std.exception : assertCTFEable;
2154
2155 assertCTFEable!(
2156 {
2157 foreach (S; AliasSeq!(string, wstring, dstring))
2158 {
2159 foreach (T; AliasSeq!(string, wstring, dstring))
2160 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
2161 assert(lastIndexOfNeither(cast(S) null, to!T("a"), 1337) == -1);
2162 assert(lastIndexOfNeither(to!S("def"), to!T("f")) == 1);
2163 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("fgh")) == 2);
2164
2165 ptrdiff_t oeIdx = 4;
2166 if (is(S == string))
2167 {
2168 oeIdx = 5;
2169 }
2170
2171 auto foundOeIdx = lastIndexOfNeither(to!S("ödfefegff"), to!T("zeg"),
2172 7);
2173 assert(foundOeIdx == oeIdx, to!string(foundOeIdx));
2174
2175 assert(lastIndexOfNeither(to!S("zfeffgfsb"), to!T("FSB"), 6,
2176 No.caseSensitive) == 5);
2177 assert(lastIndexOfNeither(to!S("def"), to!T("MI6"), 2,
2178 No.caseSensitive) == 1, to!string(lastIndexOfNeither(to!S("def"),
2179 to!T("MI6"), 2, No.caseSensitive)));
2180 assert(lastIndexOfNeither(to!S("abbadeafsb"), to!T("fSb"), 6,
2181 No.caseSensitive) == 5, to!string(lastIndexOfNeither(
2182 to!S("abbadeafsb"), to!T("fSb"), 6, No.caseSensitive)));
2183 assert(lastIndexOfNeither(to!S("defbi"), to!T("FBI"), 3,
2184 No.caseSensitive) == 1);
2185 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("NSA"), 2,
2186 No.caseSensitive) == 1, to!string(lastIndexOfNeither(
2187 to!S("dfefffg"), to!T("NSA"), 2, No.caseSensitive)));
2188 }();
2189 }
2190 }
2191 );
2192 }
2193
2194 /**
2195 * Returns the _representation of a string, which has the same type
2196 * as the string except the character type is replaced by $(D ubyte),
2197 * $(D ushort), or $(D uint) depending on the character width.
2198 *
2199 * Params:
2200 * s = The string to return the _representation of.
2201 *
2202 * Returns:
2203 * The _representation of the passed string.
2204 */
2205 auto representation(Char)(Char[] s) @safe pure nothrow @nogc
2206 if (isSomeChar!Char)
2207 {
2208 import std.traits : ModifyTypePreservingTQ;
2209 alias ToRepType(T) = AliasSeq!(ubyte, ushort, uint)[T.sizeof / 2];
2210 return cast(ModifyTypePreservingTQ!(ToRepType, Char)[])s;
2211 }
2212
2213 ///
2214 @safe pure unittest
2215 {
2216 string s = "hello";
2217 static assert(is(typeof(representation(s)) == immutable(ubyte)[]));
2218 assert(representation(s) is cast(immutable(ubyte)[]) s);
2219 assert(representation(s) == [0x68, 0x65, 0x6c, 0x6c, 0x6f]);
2220 }
2221
2222 @system pure unittest
2223 {
2224 import std.exception : assertCTFEable;
2225 import std.traits : Fields;
2226 import std.typecons : Tuple;
2227
2228 assertCTFEable!(
2229 {
2230 void test(Char, T)(Char[] str)
2231 {
2232 static assert(is(typeof(representation(str)) == T[]));
2233 assert(representation(str) is cast(T[]) str);
2234 }
2235
2236 foreach (Type; AliasSeq!(Tuple!(char , ubyte ),
2237 Tuple!(wchar, ushort),
2238 Tuple!(dchar, uint )))
2239 {
2240 alias Char = Fields!Type[0];
2241 alias Int = Fields!Type[1];
2242 enum immutable(Char)[] hello = "hello";
2243
2244 test!( immutable Char, immutable Int)(hello);
2245 test!( const Char, const Int)(hello);
2246 test!( Char, Int)(hello.dup);
2247 test!( shared Char, shared Int)(cast(shared) hello.dup);
2248 test!(const shared Char, const shared Int)(hello);
2249 }
2250 });
2251 }
2252
2253
2254 /**
2255 * Capitalize the first character of $(D s) and convert the rest of $(D s) to
2256 * lowercase.
2257 *
2258 * Params:
2259 * input = The string to _capitalize.
2260 *
2261 * Returns:
2262 * The capitalized string.
2263 *
2264 * See_Also:
2265 * $(REF asCapitalized, std,uni) for a lazy range version that doesn't allocate memory
2266 */
2267 S capitalize(S)(S input) @trusted pure
2268 if (isSomeString!S)
2269 {
2270 import std.array : array;
2271 import std.uni : asCapitalized;
2272 import std.utf : byUTF;
2273
2274 return input.asCapitalized.byUTF!(ElementEncodingType!(S)).array;
2275 }
2276
2277 ///
2278 pure @safe unittest
2279 {
2280 assert(capitalize("hello") == "Hello");
2281 assert(capitalize("World") == "World");
2282 }
2283
2284 auto capitalize(S)(auto ref S s)
2285 if (!isSomeString!S && is(StringTypeOf!S))
2286 {
2287 return capitalize!(StringTypeOf!S)(s);
2288 }
2289
2290 @safe pure unittest
2291 {
2292 assert(testAliasedString!capitalize("hello"));
2293 }
2294
2295 @safe pure unittest
2296 {
2297 import std.algorithm.comparison : cmp;
2298 import std.conv : to;
2299 import std.exception : assertCTFEable;
2300
2301 assertCTFEable!(
2302 {
2303 foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[]))
2304 {
2305 S s1 = to!S("FoL");
2306 S s2;
2307
2308 s2 = capitalize(s1);
2309 assert(cmp(s2, "Fol") == 0);
2310 assert(s2 !is s1);
2311
2312 s2 = capitalize(s1[0 .. 2]);
2313 assert(cmp(s2, "Fo") == 0);
2314
2315 s1 = to!S("fOl");
2316 s2 = capitalize(s1);
2317 assert(cmp(s2, "Fol") == 0);
2318 assert(s2 !is s1);
2319 s1 = to!S("\u0131 \u0130");
2320 s2 = capitalize(s1);
2321 assert(cmp(s2, "\u0049 i\u0307") == 0);
2322 assert(s2 !is s1);
2323
2324 s1 = to!S("\u017F \u0049");
2325 s2 = capitalize(s1);
2326 assert(cmp(s2, "\u0053 \u0069") == 0);
2327 assert(s2 !is s1);
2328 }
2329 });
2330 }
2331
2332 /++
2333 Split $(D s) into an array of lines according to the unicode standard using
2334 $(D '\r'), $(D '\n'), $(D "\r\n"), $(REF lineSep, std,uni),
2335 $(REF paraSep, std,uni), $(D U+0085) (NEL), $(D '\v') and $(D '\f')
2336 as delimiters. If $(D keepTerm) is set to $(D KeepTerminator.yes), then the
2337 delimiter is included in the strings returned.
2338
2339 Does not throw on invalid UTF; such is simply passed unchanged
2340 to the output.
2341
2342 Allocates memory; use $(LREF lineSplitter) for an alternative that
2343 does not.
2344
2345 Adheres to $(HTTP www.unicode.org/versions/Unicode7.0.0/ch05.pdf, Unicode 7.0).
2346
2347 Params:
2348 s = a string of $(D chars), $(D wchars), or $(D dchars), or any custom
2349 type that casts to a $(D string) type
2350 keepTerm = whether delimiter is included or not in the results
2351 Returns:
2352 array of strings, each element is a line that is a slice of $(D s)
2353 See_Also:
2354 $(LREF lineSplitter)
2355 $(REF splitter, std,algorithm)
2356 $(REF splitter, std,regex)
2357 +/
2358 alias KeepTerminator = Flag!"keepTerminator";
2359
2360 /// ditto
2361 S[] splitLines(S)(S s, in KeepTerminator keepTerm = No.keepTerminator) @safe pure
2362 if (isSomeString!S)
2363 {
2364 import std.array : appender;
2365 import std.uni : lineSep, paraSep;
2366
2367 size_t iStart = 0;
2368 auto retval = appender!(S[])();
2369
2370 for (size_t i; i < s.length; ++i)
2371 {
2372 switch (s[i])
2373 {
2374 case '\v', '\f', '\n':
2375 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator)]);
2376 iStart = i + 1;
2377 break;
2378
2379 case '\r':
2380 if (i + 1 < s.length && s[i + 1] == '\n')
2381 {
2382 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 2]);
2383 iStart = i + 2;
2384 ++i;
2385 }
2386 else
2387 {
2388 goto case '\n';
2389 }
2390 break;
2391
2392 static if (s[i].sizeof == 1)
2393 {
2394 /* Manually decode:
2395 * lineSep is E2 80 A8
2396 * paraSep is E2 80 A9
2397 */
2398 case 0xE2:
2399 if (i + 2 < s.length &&
2400 s[i + 1] == 0x80 &&
2401 (s[i + 2] == 0xA8 || s[i + 2] == 0xA9)
2402 )
2403 {
2404 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 3]);
2405 iStart = i + 3;
2406 i += 2;
2407 }
2408 else
2409 goto default;
2410 break;
2411 /* Manually decode:
2412 * NEL is C2 85
2413 */
2414 case 0xC2:
2415 if (i + 1 < s.length && s[i + 1] == 0x85)
2416 {
2417 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 2]);
2418 iStart = i + 2;
2419 i += 1;
2420 }
2421 else
2422 goto default;
2423 break;
2424 }
2425 else
2426 {
2427 case lineSep:
2428 case paraSep:
2429 case '\u0085':
2430 goto case '\n';
2431 }
2432
2433 default:
2434 break;
2435 }
2436 }
2437
2438 if (iStart != s.length)
2439 retval.put(s[iStart .. $]);
2440
2441 return retval.data;
2442 }
2443
2444 ///
2445 @safe pure nothrow unittest
2446 {
2447 string s = "Hello\nmy\rname\nis";
2448 assert(splitLines(s) == ["Hello", "my", "name", "is"]);
2449 }
2450
2451 @safe pure nothrow unittest
2452 {
2453 string s = "a\xC2\x86b";
2454 assert(splitLines(s) == [s]);
2455 }
2456
2457 auto splitLines(S)(auto ref S s, in KeepTerminator keepTerm = No.keepTerminator)
2458 if (!isSomeString!S && is(StringTypeOf!S))
2459 {
2460 return splitLines!(StringTypeOf!S)(s, keepTerm);
2461 }
2462
2463 @safe pure nothrow unittest
2464 {
2465 assert(testAliasedString!splitLines("hello\nworld"));
2466 }
2467
2468 @safe pure unittest
2469 {
2470 import std.conv : to;
2471 import std.exception : assertCTFEable;
2472
2473 assertCTFEable!(
2474 {
2475 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
2476 {
2477 auto s = to!S(
2478 "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\n" ~
2479 "mon\u2030day\nschadenfreude\vkindergarten\f\vcookies\u0085"
2480 );
2481 auto lines = splitLines(s);
2482 assert(lines.length == 14);
2483 assert(lines[0] == "");
2484 assert(lines[1] == "peter");
2485 assert(lines[2] == "");
2486 assert(lines[3] == "paul");
2487 assert(lines[4] == "jerry");
2488 assert(lines[5] == "ice");
2489 assert(lines[6] == "cream");
2490 assert(lines[7] == "");
2491 assert(lines[8] == "sunday");
2492 assert(lines[9] == "mon\u2030day");
2493 assert(lines[10] == "schadenfreude");
2494 assert(lines[11] == "kindergarten");
2495 assert(lines[12] == "");
2496 assert(lines[13] == "cookies");
2497
2498
2499 ubyte[] u = ['a', 0xFF, 0x12, 'b']; // invalid UTF
2500 auto ulines = splitLines(cast(char[]) u);
2501 assert(cast(ubyte[])(ulines[0]) == u);
2502
2503 lines = splitLines(s, Yes.keepTerminator);
2504 assert(lines.length == 14);
2505 assert(lines[0] == "\r");
2506 assert(lines[1] == "peter\n");
2507 assert(lines[2] == "\r");
2508 assert(lines[3] == "paul\r\n");
2509 assert(lines[4] == "jerry\u2028");
2510 assert(lines[5] == "ice\u2029");
2511 assert(lines[6] == "cream\n");
2512 assert(lines[7] == "\n");
2513 assert(lines[8] == "sunday\n");
2514 assert(lines[9] == "mon\u2030day\n");
2515 assert(lines[10] == "schadenfreude\v");
2516 assert(lines[11] == "kindergarten\f");
2517 assert(lines[12] == "\v");
2518 assert(lines[13] == "cookies\u0085");
2519
2520 s.popBack(); // Lop-off trailing \n
2521 lines = splitLines(s);
2522 assert(lines.length == 14);
2523 assert(lines[9] == "mon\u2030day");
2524
2525 lines = splitLines(s, Yes.keepTerminator);
2526 assert(lines.length == 14);
2527 assert(lines[13] == "cookies");
2528 }
2529 });
2530 }
2531
2532 private struct LineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range)
2533 {
2534 import std.conv : unsigned;
2535 import std.uni : lineSep, paraSep;
2536 private:
2537 Range _input;
2538
2539 alias IndexType = typeof(unsigned(_input.length));
2540 enum IndexType _unComputed = IndexType.max;
2541 IndexType iStart = _unComputed;
2542 IndexType iEnd = 0;
2543 IndexType iNext = 0;
2544
2545 public:
2546 this(Range input)
2547 {
2548 _input = input;
2549 }
2550
2551 static if (isInfinite!Range)
2552 {
2553 enum bool empty = false;
2554 }
2555 else
2556 {
2557 @property bool empty()
2558 {
2559 return iStart == _unComputed && iNext == _input.length;
2560 }
2561 }
2562
2563 @property typeof(_input) front()
2564 {
2565 if (iStart == _unComputed)
2566 {
2567 iStart = iNext;
2568 Loop:
2569 for (IndexType i = iNext; ; ++i)
2570 {
2571 if (i == _input.length)
2572 {
2573 iEnd = i;
2574 iNext = i;
2575 break Loop;
2576 }
2577 switch (_input[i])
2578 {
2579 case '\v', '\f', '\n':
2580 iEnd = i + (keepTerm == Yes.keepTerminator);
2581 iNext = i + 1;
2582 break Loop;
2583
2584 case '\r':
2585 if (i + 1 < _input.length && _input[i + 1] == '\n')
2586 {
2587 iEnd = i + (keepTerm == Yes.keepTerminator) * 2;
2588 iNext = i + 2;
2589 break Loop;
2590 }
2591 else
2592 {
2593 goto case '\n';
2594 }
2595
2596 static if (_input[i].sizeof == 1)
2597 {
2598 /* Manually decode:
2599 * lineSep is E2 80 A8
2600 * paraSep is E2 80 A9
2601 */
2602 case 0xE2:
2603 if (i + 2 < _input.length &&
2604 _input[i + 1] == 0x80 &&
2605 (_input[i + 2] == 0xA8 || _input[i + 2] == 0xA9)
2606 )
2607 {
2608 iEnd = i + (keepTerm == Yes.keepTerminator) * 3;
2609 iNext = i + 3;
2610 break Loop;
2611 }
2612 else
2613 goto default;
2614 /* Manually decode:
2615 * NEL is C2 85
2616 */
2617 case 0xC2:
2618 if (i + 1 < _input.length && _input[i + 1] == 0x85)
2619 {
2620 iEnd = i + (keepTerm == Yes.keepTerminator) * 2;
2621 iNext = i + 2;
2622 break Loop;
2623 }
2624 else
2625 goto default;
2626 }
2627 else
2628 {
2629 case '\u0085':
2630 case lineSep:
2631 case paraSep:
2632 goto case '\n';
2633 }
2634
2635 default:
2636 break;
2637 }
2638 }
2639 }
2640 return _input[iStart .. iEnd];
2641 }
2642
2643 void popFront()
2644 {
2645 if (iStart == _unComputed)
2646 {
2647 assert(!empty);
2648 front;
2649 }
2650 iStart = _unComputed;
2651 }
2652
2653 static if (isForwardRange!Range)
2654 {
2655 @property typeof(this) save()
2656 {
2657 auto ret = this;
2658 ret._input = _input.save;
2659 return ret;
2660 }
2661 }
2662 }
2663
2664 /***********************************
2665 * Split an array or slicable range of characters into a range of lines
2666 using $(D '\r'), $(D '\n'), $(D '\v'), $(D '\f'), $(D "\r\n"),
2667 $(REF lineSep, std,uni), $(REF paraSep, std,uni) and $(D '\u0085') (NEL)
2668 as delimiters. If $(D keepTerm) is set to $(D Yes.keepTerminator), then the
2669 delimiter is included in the slices returned.
2670
2671 Does not throw on invalid UTF; such is simply passed unchanged
2672 to the output.
2673
2674 Adheres to $(HTTP www.unicode.org/versions/Unicode7.0.0/ch05.pdf, Unicode 7.0).
2675
2676 Does not allocate memory.
2677
2678 Params:
2679 r = array of $(D chars), $(D wchars), or $(D dchars) or a slicable range
2680 keepTerm = whether delimiter is included or not in the results
2681 Returns:
2682 range of slices of the input range $(D r)
2683
2684 See_Also:
2685 $(LREF splitLines)
2686 $(REF splitter, std,algorithm)
2687 $(REF splitter, std,regex)
2688 */
2689 auto lineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range)(Range r)
2690 if ((hasSlicing!Range && hasLength!Range && isSomeChar!(ElementType!Range) ||
2691 isSomeString!Range) &&
2692 !isConvertibleToString!Range)
2693 {
2694 return LineSplitter!(keepTerm, Range)(r);
2695 }
2696
2697 ///
2698 @safe pure unittest
2699 {
2700 import std.array : array;
2701
2702 string s = "Hello\nmy\rname\nis";
2703
2704 /* notice the call to 'array' to turn the lazy range created by
2705 lineSplitter comparable to the string[] created by splitLines.
2706 */
2707 assert(lineSplitter(s).array == splitLines(s));
2708 }
2709
2710 auto lineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range)(auto ref Range r)
2711 if (isConvertibleToString!Range)
2712 {
2713 return LineSplitter!(keepTerm, StringTypeOf!Range)(r);
2714 }
2715
2716 @safe pure unittest
2717 {
2718 import std.array : array;
2719 import std.conv : to;
2720 import std.exception : assertCTFEable;
2721
2722 assertCTFEable!(
2723 {
2724 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
2725 {
2726 auto s = to!S(
2727 "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\n" ~
2728 "sunday\nmon\u2030day\nschadenfreude\vkindergarten\f\vcookies\u0085"
2729 );
2730
2731 auto lines = lineSplitter(s).array;
2732 assert(lines.length == 14);
2733 assert(lines[0] == "");
2734 assert(lines[1] == "peter");
2735 assert(lines[2] == "");
2736 assert(lines[3] == "paul");
2737 assert(lines[4] == "jerry");
2738 assert(lines[5] == "ice");
2739 assert(lines[6] == "cream");
2740 assert(lines[7] == "");
2741 assert(lines[8] == "sunday");
2742 assert(lines[9] == "mon\u2030day");
2743 assert(lines[10] == "schadenfreude");
2744 assert(lines[11] == "kindergarten");
2745 assert(lines[12] == "");
2746 assert(lines[13] == "cookies");
2747
2748
2749 ubyte[] u = ['a', 0xFF, 0x12, 'b']; // invalid UTF
2750 auto ulines = lineSplitter(cast(char[]) u).array;
2751 assert(cast(ubyte[])(ulines[0]) == u);
2752
2753 lines = lineSplitter!(Yes.keepTerminator)(s).array;
2754 assert(lines.length == 14);
2755 assert(lines[0] == "\r");
2756 assert(lines[1] == "peter\n");
2757 assert(lines[2] == "\r");
2758 assert(lines[3] == "paul\r\n");
2759 assert(lines[4] == "jerry\u2028");
2760 assert(lines[5] == "ice\u2029");
2761 assert(lines[6] == "cream\n");
2762 assert(lines[7] == "\n");
2763 assert(lines[8] == "sunday\n");
2764 assert(lines[9] == "mon\u2030day\n");
2765 assert(lines[10] == "schadenfreude\v");
2766 assert(lines[11] == "kindergarten\f");
2767 assert(lines[12] == "\v");
2768 assert(lines[13] == "cookies\u0085");
2769
2770 s.popBack(); // Lop-off trailing \n
2771 lines = lineSplitter(s).array;
2772 assert(lines.length == 14);
2773 assert(lines[9] == "mon\u2030day");
2774
2775 lines = lineSplitter!(Yes.keepTerminator)(s).array;
2776 assert(lines.length == 14);
2777 assert(lines[13] == "cookies");
2778 }
2779 });
2780 }
2781
2782 ///
2783 @nogc @safe pure unittest
2784 {
2785 auto s = "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\nmon\u2030day\n";
2786 auto lines = s.lineSplitter();
2787 static immutable witness = ["", "peter", "", "paul", "jerry", "ice", "cream", "", "sunday", "mon\u2030day"];
2788 uint i;
2789 foreach (line; lines)
2790 {
2791 assert(line == witness[i++]);
2792 }
2793 assert(i == witness.length);
2794 }
2795
2796 @nogc @safe pure unittest
2797 {
2798 import std.algorithm.comparison : equal;
2799 auto s = "std/string.d";
2800 auto as = TestAliasedString(s);
2801 assert(equal(s.lineSplitter(), as.lineSplitter()));
2802 }
2803
2804 @safe pure unittest
2805 {
2806 auto s = "line1\nline2";
2807 auto spl0 = s.lineSplitter!(Yes.keepTerminator);
2808 auto spl1 = spl0.save;
2809 spl0.popFront;
2810 assert(spl1.front ~ spl0.front == s);
2811 string r = "a\xC2\x86b";
2812 assert(r.lineSplitter.front == r);
2813 }
2814
2815 /++
2816 Strips leading whitespace (as defined by $(REF isWhite, std,uni)).
2817
2818 Params:
2819 input = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
2820 of characters
2821
2822 Returns: $(D input) stripped of leading whitespace.
2823
2824 Postconditions: $(D input) and the returned value
2825 will share the same tail (see $(REF sameTail, std,array)).
2826
2827 See_Also:
2828 Generic stripping on ranges: $(REF _stripLeft, std, algorithm, mutation)
2829 +/
2830 auto stripLeft(Range)(Range input)
2831 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
2832 !isInfinite!Range && !isConvertibleToString!Range)
2833 {
2834 static import std.ascii;
2835 static import std.uni;
2836 import std.utf : decodeFront;
2837
2838 while (!input.empty)
2839 {
2840 auto c = input.front;
2841 if (std.ascii.isASCII(c))
2842 {
2843 if (!std.ascii.isWhite(c))
2844 break;
2845 input.popFront();
2846 }
2847 else
2848 {
2849 auto save = input.save;
2850 auto dc = decodeFront(input);
2851 if (!std.uni.isWhite(dc))
2852 return save;
2853 }
2854 }
2855 return input;
2856 }
2857
2858 ///
2859 @safe pure unittest
2860 {
2861 import std.uni : lineSep, paraSep;
2862 assert(stripLeft(" hello world ") ==
2863 "hello world ");
2864 assert(stripLeft("\n\t\v\rhello world\n\t\v\r") ==
2865 "hello world\n\t\v\r");
2866 assert(stripLeft("hello world") ==
2867 "hello world");
2868 assert(stripLeft([lineSep] ~ "hello world" ~ lineSep) ==
2869 "hello world" ~ [lineSep]);
2870 assert(stripLeft([paraSep] ~ "hello world" ~ paraSep) ==
2871 "hello world" ~ [paraSep]);
2872
2873 import std.array : array;
2874 import std.utf : byChar;
2875 assert(stripLeft(" hello world "w.byChar).array ==
2876 "hello world ");
2877 }
2878
2879 auto stripLeft(Range)(auto ref Range str)
2880 if (isConvertibleToString!Range)
2881 {
2882 return stripLeft!(StringTypeOf!Range)(str);
2883 }
2884
2885 @safe pure unittest
2886 {
2887 assert(testAliasedString!stripLeft(" hello"));
2888 }
2889
2890 /++
2891 Strips trailing whitespace (as defined by $(REF isWhite, std,uni)).
2892
2893 Params:
2894 str = string or random access range of characters
2895
2896 Returns:
2897 slice of $(D str) stripped of trailing whitespace.
2898
2899 See_Also:
2900 Generic stripping on ranges: $(REF _stripRight, std, algorithm, mutation)
2901 +/
2902 auto stripRight(Range)(Range str)
2903 if (isSomeString!Range ||
2904 isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range &&
2905 !isConvertibleToString!Range &&
2906 isSomeChar!(ElementEncodingType!Range))
2907 {
2908 import std.uni : isWhite;
2909 alias C = Unqual!(ElementEncodingType!(typeof(str)));
2910
2911 static if (isSomeString!(typeof(str)))
2912 {
2913 import std.utf : codeLength;
2914
2915 foreach_reverse (i, dchar c; str)
2916 {
2917 if (!isWhite(c))
2918 return str[0 .. i + codeLength!C(c)];
2919 }
2920
2921 return str[0 .. 0];
2922 }
2923 else
2924 {
2925 size_t i = str.length;
2926 while (i--)
2927 {
2928 static if (C.sizeof == 4)
2929 {
2930 if (isWhite(str[i]))
2931 continue;
2932 break;
2933 }
2934 else static if (C.sizeof == 2)
2935 {
2936 auto c2 = str[i];
2937 if (c2 < 0xD800 || c2 >= 0xE000)
2938 {
2939 if (isWhite(c2))
2940 continue;
2941 }
2942 else if (c2 >= 0xDC00)
2943 {
2944 if (i)
2945 {
2946 immutable c1 = str[i - 1];
2947 if (c1 >= 0xD800 && c1 < 0xDC00)
2948 {
2949 immutable dchar c = ((c1 - 0xD7C0) << 10) + (c2 - 0xDC00);
2950 if (isWhite(c))
2951 {
2952 --i;
2953 continue;
2954 }
2955 }
2956 }
2957 }
2958 break;
2959 }
2960 else static if (C.sizeof == 1)
2961 {
2962 import std.utf : byDchar;
2963
2964 char cx = str[i];
2965 if (cx <= 0x7F)
2966 {
2967 if (isWhite(cx))
2968 continue;
2969 break;
2970 }
2971 else
2972 {
2973 size_t stride = 0;
2974
2975 while (1)
2976 {
2977 ++stride;
2978 if (!i || (cx & 0xC0) == 0xC0 || stride == 4)
2979 break;
2980 cx = str[i - 1];
2981 if (!(cx & 0x80))
2982 break;
2983 --i;
2984 }
2985
2986 if (!str[i .. i + stride].byDchar.front.isWhite)
2987 return str[0 .. i + stride];
2988 }
2989 }
2990 else
2991 static assert(0);
2992 }
2993
2994 return str[0 .. i + 1];
2995 }
2996 }
2997
2998 ///
2999 @safe pure
3000 unittest
3001 {
3002 import std.uni : lineSep, paraSep;
3003 assert(stripRight(" hello world ") ==
3004 " hello world");
3005 assert(stripRight("\n\t\v\rhello world\n\t\v\r") ==
3006 "\n\t\v\rhello world");
3007 assert(stripRight("hello world") ==
3008 "hello world");
3009 assert(stripRight([lineSep] ~ "hello world" ~ lineSep) ==
3010 [lineSep] ~ "hello world");
3011 assert(stripRight([paraSep] ~ "hello world" ~ paraSep) ==
3012 [paraSep] ~ "hello world");
3013 }
3014
3015 auto stripRight(Range)(auto ref Range str)
3016 if (isConvertibleToString!Range)
3017 {
3018 return stripRight!(StringTypeOf!Range)(str);
3019 }
3020
3021 @safe pure unittest
3022 {
3023 assert(testAliasedString!stripRight("hello "));
3024 }
3025
3026 @safe pure unittest
3027 {
3028 import std.array : array;
3029 import std.uni : lineSep, paraSep;
3030 import std.utf : byChar, byDchar, byUTF, byWchar, invalidUTFstrings;
3031 assert(stripRight(" hello world ".byChar).array == " hello world");
3032 assert(stripRight("\n\t\v\rhello world\n\t\v\r"w.byWchar).array == "\n\t\v\rhello world"w);
3033 assert(stripRight("hello world"d.byDchar).array == "hello world"d);
3034 assert(stripRight("\u2028hello world\u2020\u2028".byChar).array == "\u2028hello world\u2020");
3035 assert(stripRight("hello world\U00010001"w.byWchar).array == "hello world\U00010001"w);
3036
3037 foreach (C; AliasSeq!(char, wchar, dchar))
3038 {
3039 foreach (s; invalidUTFstrings!C())
3040 {
3041 cast(void) stripRight(s.byUTF!C).array;
3042 }
3043 }
3044
3045 cast(void) stripRight("a\x80".byUTF!char).array;
3046 wstring ws = ['a', cast(wchar) 0xDC00];
3047 cast(void) stripRight(ws.byUTF!wchar).array;
3048 }
3049
3050
3051 /++
3052 Strips both leading and trailing whitespace (as defined by
3053 $(REF isWhite, std,uni)).
3054
3055 Params:
3056 str = string or random access range of characters
3057
3058 Returns:
3059 slice of $(D str) stripped of leading and trailing whitespace.
3060
3061 See_Also:
3062 Generic stripping on ranges: $(REF _strip, std, algorithm, mutation)
3063 +/
3064 auto strip(Range)(Range str)
3065 if (isSomeString!Range ||
3066 isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range &&
3067 !isConvertibleToString!Range &&
3068 isSomeChar!(ElementEncodingType!Range))
3069 {
3070 return stripRight(stripLeft(str));
3071 }
3072
3073 ///
3074 @safe pure unittest
3075 {
3076 import std.uni : lineSep, paraSep;
3077 assert(strip(" hello world ") ==
3078 "hello world");
3079 assert(strip("\n\t\v\rhello world\n\t\v\r") ==
3080 "hello world");
3081 assert(strip("hello world") ==
3082 "hello world");
3083 assert(strip([lineSep] ~ "hello world" ~ [lineSep]) ==
3084 "hello world");
3085 assert(strip([paraSep] ~ "hello world" ~ [paraSep]) ==
3086 "hello world");
3087 }
3088
3089 auto strip(Range)(auto ref Range str)
3090 if (isConvertibleToString!Range)
3091 {
3092 return strip!(StringTypeOf!Range)(str);
3093 }
3094
3095 @safe pure unittest
3096 {
3097 assert(testAliasedString!strip(" hello world "));
3098 }
3099
3100 @safe pure unittest
3101 {
3102 import std.algorithm.comparison : equal;
3103 import std.conv : to;
3104 import std.exception : assertCTFEable;
3105
3106 assertCTFEable!(
3107 {
3108 foreach (S; AliasSeq!( char[], const char[], string,
3109 wchar[], const wchar[], wstring,
3110 dchar[], const dchar[], dstring))
3111 {
3112 assert(equal(stripLeft(to!S(" foo\t ")), "foo\t "));
3113 assert(equal(stripLeft(to!S("\u2008 foo\t \u2007")), "foo\t \u2007"));
3114 assert(equal(stripLeft(to!S("\u0085 μ \u0085 \u00BB \r")), "μ \u0085 \u00BB \r"));
3115 assert(equal(stripLeft(to!S("1")), "1"));
3116 assert(equal(stripLeft(to!S("\U0010FFFE")), "\U0010FFFE"));
3117 assert(equal(stripLeft(to!S("")), ""));
3118
3119 assert(equal(stripRight(to!S(" foo\t ")), " foo"));
3120 assert(equal(stripRight(to!S("\u2008 foo\t \u2007")), "\u2008 foo"));
3121 assert(equal(stripRight(to!S("\u0085 μ \u0085 \u00BB \r")), "\u0085 μ \u0085 \u00BB"));
3122 assert(equal(stripRight(to!S("1")), "1"));
3123 assert(equal(stripRight(to!S("\U0010FFFE")), "\U0010FFFE"));
3124 assert(equal(stripRight(to!S("")), ""));
3125
3126 assert(equal(strip(to!S(" foo\t ")), "foo"));
3127 assert(equal(strip(to!S("\u2008 foo\t \u2007")), "foo"));
3128 assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB \r")), "μ \u0085 \u00BB"));
3129 assert(equal(strip(to!S("\U0010FFFE")), "\U0010FFFE"));
3130 assert(equal(strip(to!S("")), ""));
3131 }
3132 });
3133 }
3134
3135 @safe pure unittest
3136 {
3137 import std.array : sameHead, sameTail;
3138 import std.exception : assertCTFEable;
3139 assertCTFEable!(
3140 {
3141 wstring s = " ";
3142 assert(s.sameTail(s.stripLeft()));
3143 assert(s.sameHead(s.stripRight()));
3144 });
3145 }
3146
3147
3148 /++
3149 If $(D str) ends with $(D delimiter), then $(D str) is returned without
3150 $(D delimiter) on its end. If it $(D str) does $(I not) end with
3151 $(D delimiter), then it is returned unchanged.
3152
3153 If no $(D delimiter) is given, then one trailing $(D '\r'), $(D '\n'),
3154 $(D "\r\n"), $(D '\f'), $(D '\v'), $(REF lineSep, std,uni), $(REF paraSep, std,uni), or $(REF nelSep, std,uni)
3155 is removed from the end of $(D str). If $(D str) does not end with any of those characters,
3156 then it is returned unchanged.
3157
3158 Params:
3159 str = string or indexable range of characters
3160 delimiter = string of characters to be sliced off end of str[]
3161
3162 Returns:
3163 slice of str
3164 +/
3165 Range chomp(Range)(Range str)
3166 if ((isRandomAccessRange!Range && isSomeChar!(ElementEncodingType!Range) ||
3167 isNarrowString!Range) &&
3168 !isConvertibleToString!Range)
3169 {
3170 import std.uni : lineSep, paraSep, nelSep;
3171 if (str.empty)
3172 return str;
3173
3174 alias C = ElementEncodingType!Range;
3175
3176 switch (str[$ - 1])
3177 {
3178 case '\n':
3179 {
3180 if (str.length > 1 && str[$ - 2] == '\r')
3181 return str[0 .. $ - 2];
3182 goto case;
3183 }
3184 case '\r', '\v', '\f':
3185 return str[0 .. $ - 1];
3186
3187 // Pop off the last character if lineSep, paraSep, or nelSep
3188 static if (is(C : const char))
3189 {
3190 /* Manually decode:
3191 * lineSep is E2 80 A8
3192 * paraSep is E2 80 A9
3193 */
3194 case 0xA8: // Last byte of lineSep
3195 case 0xA9: // Last byte of paraSep
3196 if (str.length > 2 && str[$ - 2] == 0x80 && str[$ - 3] == 0xE2)
3197 return str [0 .. $ - 3];
3198 goto default;
3199
3200 /* Manually decode:
3201 * NEL is C2 85
3202 */
3203 case 0x85:
3204 if (str.length > 1 && str[$ - 2] == 0xC2)
3205 return str [0 .. $ - 2];
3206 goto default;
3207 }
3208 else
3209 {
3210 case lineSep:
3211 case paraSep:
3212 case nelSep:
3213 return str[0 .. $ - 1];
3214 }
3215 default:
3216 return str;
3217 }
3218 }
3219
3220 /// Ditto
3221 Range chomp(Range, C2)(Range str, const(C2)[] delimiter)
3222 if ((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range) ||
3223 isNarrowString!Range) &&
3224 !isConvertibleToString!Range &&
3225 isSomeChar!C2)
3226 {
3227 if (delimiter.empty)
3228 return chomp(str);
3229
3230 alias C1 = ElementEncodingType!Range;
3231
3232 static if (is(Unqual!C1 == Unqual!C2) && (isSomeString!Range || (hasSlicing!Range && C2.sizeof == 4)))
3233 {
3234 import std.algorithm.searching : endsWith;
3235 if (str.endsWith(delimiter))
3236 return str[0 .. $ - delimiter.length];
3237 return str;
3238 }
3239 else
3240 {
3241 auto orig = str.save;
3242
3243 static if (isSomeString!Range)
3244 alias C = dchar; // because strings auto-decode
3245 else
3246 alias C = C1; // and ranges do not
3247
3248 foreach_reverse (C c; delimiter)
3249 {
3250 if (str.empty || str.back != c)
3251 return orig;
3252
3253 str.popBack();
3254 }
3255
3256 return str;
3257 }
3258 }
3259
3260 ///
3261 @safe pure
3262 unittest
3263 {
3264 import std.uni : lineSep, paraSep, nelSep;
3265 import std.utf : decode;
3266 assert(chomp(" hello world \n\r") == " hello world \n");
3267 assert(chomp(" hello world \r\n") == " hello world ");
3268 assert(chomp(" hello world \f") == " hello world ");
3269 assert(chomp(" hello world \v") == " hello world ");
3270 assert(chomp(" hello world \n\n") == " hello world \n");
3271 assert(chomp(" hello world \n\n ") == " hello world \n\n ");
3272 assert(chomp(" hello world \n\n" ~ [lineSep]) == " hello world \n\n");
3273 assert(chomp(" hello world \n\n" ~ [paraSep]) == " hello world \n\n");
3274 assert(chomp(" hello world \n\n" ~ [ nelSep]) == " hello world \n\n");
3275 assert(chomp(" hello world") == " hello world");
3276 assert(chomp("") == "");
3277
3278 assert(chomp(" hello world", "orld") == " hello w");
3279 assert(chomp(" hello world", " he") == " hello world");
3280 assert(chomp("", "hello") == "");
3281
3282 // Don't decode pointlessly
3283 assert(chomp("hello\xFE", "\r") == "hello\xFE");
3284 }
3285
3286 StringTypeOf!Range chomp(Range)(auto ref Range str)
3287 if (isConvertibleToString!Range)
3288 {
3289 return chomp!(StringTypeOf!Range)(str);
3290 }
3291
3292 StringTypeOf!Range chomp(Range, C2)(auto ref Range str, const(C2)[] delimiter)
3293 if (isConvertibleToString!Range)
3294 {
3295 return chomp!(StringTypeOf!Range, C2)(str, delimiter);
3296 }
3297
3298 @safe pure unittest
3299 {
3300 assert(testAliasedString!chomp(" hello world \n\r"));
3301 assert(testAliasedString!chomp(" hello world", "orld"));
3302 }
3303
3304 @safe pure unittest
3305 {
3306 import std.conv : to;
3307 import std.exception : assertCTFEable;
3308
3309 string s;
3310
3311 assertCTFEable!(
3312 {
3313 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3314 {
3315 // @@@ BUG IN COMPILER, MUST INSERT CAST
3316 assert(chomp(cast(S) null) is null);
3317 assert(chomp(to!S("hello")) == "hello");
3318 assert(chomp(to!S("hello\n")) == "hello");
3319 assert(chomp(to!S("hello\r")) == "hello");
3320 assert(chomp(to!S("hello\r\n")) == "hello");
3321 assert(chomp(to!S("hello\n\r")) == "hello\n");
3322 assert(chomp(to!S("hello\n\n")) == "hello\n");
3323 assert(chomp(to!S("hello\r\r")) == "hello\r");
3324 assert(chomp(to!S("hello\nxxx\n")) == "hello\nxxx");
3325 assert(chomp(to!S("hello\u2028")) == "hello");
3326 assert(chomp(to!S("hello\u2029")) == "hello");
3327 assert(chomp(to!S("hello\u0085")) == "hello");
3328 assert(chomp(to!S("hello\u2028\u2028")) == "hello\u2028");
3329 assert(chomp(to!S("hello\u2029\u2029")) == "hello\u2029");
3330 assert(chomp(to!S("hello\u2029\u2129")) == "hello\u2029\u2129");
3331 assert(chomp(to!S("hello\u2029\u0185")) == "hello\u2029\u0185");
3332
3333 foreach (T; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3334 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
3335 // @@@ BUG IN COMPILER, MUST INSERT CAST
3336 assert(chomp(cast(S) null, cast(T) null) is null);
3337 assert(chomp(to!S("hello\n"), cast(T) null) == "hello");
3338 assert(chomp(to!S("hello"), to!T("o")) == "hell");
3339 assert(chomp(to!S("hello"), to!T("p")) == "hello");
3340 // @@@ BUG IN COMPILER, MUST INSERT CAST
3341 assert(chomp(to!S("hello"), cast(T) null) == "hello");
3342 assert(chomp(to!S("hello"), to!T("llo")) == "he");
3343 assert(chomp(to!S("\uFF28ello"), to!T("llo")) == "\uFF28e");
3344 assert(chomp(to!S("\uFF28el\uFF4co"), to!T("l\uFF4co")) == "\uFF28e");
3345 }();
3346 }
3347 });
3348
3349 // Ranges
3350 import std.array : array;
3351 import std.utf : byChar, byWchar, byDchar;
3352 assert(chomp("hello world\r\n" .byChar ).array == "hello world");
3353 assert(chomp("hello world\r\n"w.byWchar).array == "hello world"w);
3354 assert(chomp("hello world\r\n"d.byDchar).array == "hello world"d);
3355
3356 assert(chomp("hello world"d.byDchar, "ld").array == "hello wor"d);
3357
3358 assert(chomp("hello\u2020" .byChar , "\u2020").array == "hello");
3359 assert(chomp("hello\u2020"d.byDchar, "\u2020"d).array == "hello"d);
3360 }
3361
3362
3363 /++
3364 If $(D str) starts with $(D delimiter), then the part of $(D str) following
3365 $(D delimiter) is returned. If $(D str) does $(I not) start with
3366
3367 $(D delimiter), then it is returned unchanged.
3368
3369 Params:
3370 str = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
3371 of characters
3372 delimiter = string of characters to be sliced off front of str[]
3373
3374 Returns:
3375 slice of str
3376 +/
3377 Range chompPrefix(Range, C2)(Range str, const(C2)[] delimiter)
3378 if ((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) ||
3379 isNarrowString!Range) &&
3380 !isConvertibleToString!Range &&
3381 isSomeChar!C2)
3382 {
3383 alias C1 = ElementEncodingType!Range;
3384
3385 static if (is(Unqual!C1 == Unqual!C2) && (isSomeString!Range || (hasSlicing!Range && C2.sizeof == 4)))
3386 {
3387 import std.algorithm.searching : startsWith;
3388 if (str.startsWith(delimiter))
3389 return str[delimiter.length .. $];
3390 return str;
3391 }
3392 else
3393 {
3394 auto orig = str.save;
3395
3396 static if (isSomeString!Range)
3397 alias C = dchar; // because strings auto-decode
3398 else
3399 alias C = C1; // and ranges do not
3400
3401 foreach (C c; delimiter)
3402 {
3403 if (str.empty || str.front != c)
3404 return orig;
3405
3406 str.popFront();
3407 }
3408
3409 return str;
3410 }
3411 }
3412
3413 ///
3414 @safe pure unittest
3415 {
3416 assert(chompPrefix("hello world", "he") == "llo world");
3417 assert(chompPrefix("hello world", "hello w") == "orld");
3418 assert(chompPrefix("hello world", " world") == "hello world");
3419 assert(chompPrefix("", "hello") == "");
3420 }
3421
3422 StringTypeOf!Range chompPrefix(Range, C2)(auto ref Range str, const(C2)[] delimiter)
3423 if (isConvertibleToString!Range)
3424 {
3425 return chompPrefix!(StringTypeOf!Range, C2)(str, delimiter);
3426 }
3427
3428 @safe pure
3429 unittest
3430 {
3431 import std.algorithm.comparison : equal;
3432 import std.conv : to;
3433 import std.exception : assertCTFEable;
3434 assertCTFEable!(
3435 {
3436 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3437 {
3438 foreach (T; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3439 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
3440 assert(equal(chompPrefix(to!S("abcdefgh"), to!T("abcde")), "fgh"));
3441 assert(equal(chompPrefix(to!S("abcde"), to!T("abcdefgh")), "abcde"));
3442 assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el\uFF4co")), ""));
3443 assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el")), "\uFF4co"));
3444 assert(equal(chompPrefix(to!S("\uFF28el"), to!T("\uFF28el\uFF4co")), "\uFF28el"));
3445 }();
3446 }
3447 });
3448
3449 // Ranges
3450 import std.array : array;
3451 import std.utf : byChar, byWchar, byDchar;
3452 assert(chompPrefix("hello world" .byChar , "hello"d).array == " world");
3453 assert(chompPrefix("hello world"w.byWchar, "hello" ).array == " world"w);
3454 assert(chompPrefix("hello world"d.byDchar, "hello"w).array == " world"d);
3455 assert(chompPrefix("hello world"c.byDchar, "hello"w).array == " world"d);
3456
3457 assert(chompPrefix("hello world"d.byDchar, "lx").array == "hello world"d);
3458 assert(chompPrefix("hello world"d.byDchar, "hello world xx").array == "hello world"d);
3459
3460 assert(chompPrefix("\u2020world" .byChar , "\u2020").array == "world");
3461 assert(chompPrefix("\u2020world"d.byDchar, "\u2020"d).array == "world"d);
3462 }
3463
3464 @safe pure unittest
3465 {
3466 assert(testAliasedString!chompPrefix("hello world", "hello"));
3467 }
3468
3469 /++
3470 Returns $(D str) without its last character, if there is one. If $(D str)
3471 ends with $(D "\r\n"), then both are removed. If $(D str) is empty, then
3472 then it is returned unchanged.
3473
3474 Params:
3475 str = string (must be valid UTF)
3476 Returns:
3477 slice of str
3478 +/
3479
3480 Range chop(Range)(Range str)
3481 if ((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range) ||
3482 isNarrowString!Range) &&
3483 !isConvertibleToString!Range)
3484 {
3485 if (str.empty)
3486 return str;
3487
3488 static if (isSomeString!Range)
3489 {
3490 if (str.length >= 2 && str[$ - 1] == '\n' && str[$ - 2] == '\r')
3491 return str[0 .. $ - 2];
3492 str.popBack();
3493 return str;
3494 }
3495 else
3496 {
3497 alias C = Unqual!(ElementEncodingType!Range);
3498 C c = str.back;
3499 str.popBack();
3500 if (c == '\n')
3501 {
3502 if (!str.empty && str.back == '\r')
3503 str.popBack();
3504 return str;
3505 }
3506 // Pop back a dchar, not just a code unit
3507 static if (C.sizeof == 1)
3508 {
3509 int cnt = 1;
3510 while ((c & 0xC0) == 0x80)
3511 {
3512 if (str.empty)
3513 break;
3514 c = str.back;
3515 str.popBack();
3516 if (++cnt > 4)
3517 break;
3518 }
3519 }
3520 else static if (C.sizeof == 2)
3521 {
3522 if (c >= 0xD800 && c <= 0xDBFF)
3523 {
3524 if (!str.empty)
3525 str.popBack();
3526 }
3527 }
3528 else static if (C.sizeof == 4)
3529 {
3530 }
3531 else
3532 static assert(0);
3533 return str;
3534 }
3535 }
3536
3537 ///
3538 @safe pure unittest
3539 {
3540 assert(chop("hello world") == "hello worl");
3541 assert(chop("hello world\n") == "hello world");
3542 assert(chop("hello world\r") == "hello world");
3543 assert(chop("hello world\n\r") == "hello world\n");
3544 assert(chop("hello world\r\n") == "hello world");
3545 assert(chop("Walter Bright") == "Walter Brigh");
3546 assert(chop("") == "");
3547 }
3548
3549 StringTypeOf!Range chop(Range)(auto ref Range str)
3550 if (isConvertibleToString!Range)
3551 {
3552 return chop!(StringTypeOf!Range)(str);
3553 }
3554
3555 @safe pure unittest
3556 {
3557 assert(testAliasedString!chop("hello world"));
3558 }
3559
3560 @safe pure unittest
3561 {
3562 import std.array : array;
3563 import std.utf : byChar, byWchar, byDchar, byCodeUnit, invalidUTFstrings;
3564
3565 assert(chop("hello world".byChar).array == "hello worl");
3566 assert(chop("hello world\n"w.byWchar).array == "hello world"w);
3567 assert(chop("hello world\r"d.byDchar).array == "hello world"d);
3568 assert(chop("hello world\n\r".byChar).array == "hello world\n");
3569 assert(chop("hello world\r\n"w.byWchar).array == "hello world"w);
3570 assert(chop("Walter Bright"d.byDchar).array == "Walter Brigh"d);
3571 assert(chop("".byChar).array == "");
3572
3573 assert(chop(`ミツバチと科学者` .byCodeUnit).array == "ミツバチと科学");
3574 assert(chop(`ミツバチと科学者`w.byCodeUnit).array == "ミツバチと科学"w);
3575 assert(chop(`ミツバチと科学者`d.byCodeUnit).array == "ミツバチと科学"d);
3576
3577 auto ca = invalidUTFstrings!char();
3578 foreach (s; ca)
3579 {
3580 foreach (c; chop(s.byCodeUnit))
3581 {
3582 }
3583 }
3584
3585 auto wa = invalidUTFstrings!wchar();
3586 foreach (s; wa)
3587 {
3588 foreach (c; chop(s.byCodeUnit))
3589 {
3590 }
3591 }
3592 }
3593
3594 @safe pure unittest
3595 {
3596 import std.algorithm.comparison : equal;
3597 import std.conv : to;
3598 import std.exception : assertCTFEable;
3599
3600 assertCTFEable!(
3601 {
3602 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3603 {
3604 assert(chop(cast(S) null) is null);
3605 assert(equal(chop(to!S("hello")), "hell"));
3606 assert(equal(chop(to!S("hello\r\n")), "hello"));
3607 assert(equal(chop(to!S("hello\n\r")), "hello\n"));
3608 assert(equal(chop(to!S("Verité")), "Verit"));
3609 assert(equal(chop(to!S(`さいごの果実`)), "さいごの果"));
3610 assert(equal(chop(to!S(`ミツバチと科学者`)), "ミツバチと科学"));
3611 }
3612 });
3613 }
3614
3615
3616 /++
3617 Left justify $(D s) in a field $(D width) characters wide. $(D fillChar)
3618 is the character that will be used to fill up the space in the field that
3619 $(D s) doesn't fill.
3620
3621 Params:
3622 s = string
3623 width = minimum field width
3624 fillChar = used to pad end up to $(D width) characters
3625
3626 Returns:
3627 GC allocated string
3628
3629 See_Also:
3630 $(LREF leftJustifier), which does not allocate
3631 +/
3632 S leftJustify(S)(S s, size_t width, dchar fillChar = ' ')
3633 if (isSomeString!S)
3634 {
3635 import std.array : array;
3636 return leftJustifier(s, width, fillChar).array;
3637 }
3638
3639 ///
3640 @safe pure unittest
3641 {
3642 assert(leftJustify("hello", 7, 'X') == "helloXX");
3643 assert(leftJustify("hello", 2, 'X') == "hello");
3644 assert(leftJustify("hello", 9, 'X') == "helloXXXX");
3645 }
3646
3647 /++
3648 Left justify $(D s) in a field $(D width) characters wide. $(D fillChar)
3649 is the character that will be used to fill up the space in the field that
3650 $(D s) doesn't fill.
3651
3652 Params:
3653 r = string or range of characters
3654 width = minimum field width
3655 fillChar = used to pad end up to $(D width) characters
3656
3657 Returns:
3658 a lazy range of the left justified result
3659
3660 See_Also:
3661 $(LREF rightJustifier)
3662 +/
3663
3664 auto leftJustifier(Range)(Range r, size_t width, dchar fillChar = ' ')
3665 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) &&
3666 !isConvertibleToString!Range)
3667 {
3668 alias C = Unqual!(ElementEncodingType!Range);
3669
3670 static if (C.sizeof == 1)
3671 {
3672 import std.utf : byDchar, byChar;
3673 return leftJustifier(r.byDchar, width, fillChar).byChar;
3674 }
3675 else static if (C.sizeof == 2)
3676 {
3677 import std.utf : byDchar, byWchar;
3678 return leftJustifier(r.byDchar, width, fillChar).byWchar;
3679 }
3680 else static if (C.sizeof == 4)
3681 {
3682 static struct Result
3683 {
3684 private:
3685 Range _input;
3686 size_t _width;
3687 dchar _fillChar;
3688 size_t len;
3689
3690 public:
3691
3692 @property bool empty()
3693 {
3694 return len >= _width && _input.empty;
3695 }
3696
3697 @property C front()
3698 {
3699 return _input.empty ? _fillChar : _input.front;
3700 }
3701
3702 void popFront()
3703 {
3704 ++len;
3705 if (!_input.empty)
3706 _input.popFront();
3707 }
3708
3709 static if (isForwardRange!Range)
3710 {
3711 @property typeof(this) save() return scope
3712 {
3713 auto ret = this;
3714 ret._input = _input.save;
3715 return ret;
3716 }
3717 }
3718 }
3719
3720 return Result(r, width, fillChar);
3721 }
3722 else
3723 static assert(0);
3724 }
3725
3726 ///
3727 @safe pure @nogc nothrow
3728 unittest
3729 {
3730 import std.algorithm.comparison : equal;
3731 import std.utf : byChar;
3732 assert(leftJustifier("hello", 2).equal("hello".byChar));
3733 assert(leftJustifier("hello", 7).equal("hello ".byChar));
3734 assert(leftJustifier("hello", 7, 'x').equal("helloxx".byChar));
3735 }
3736
3737 auto leftJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ')
3738 if (isConvertibleToString!Range)
3739 {
3740 return leftJustifier!(StringTypeOf!Range)(r, width, fillChar);
3741 }
3742
3743 @safe pure unittest
3744 {
3745 auto r = "hello".leftJustifier(8);
3746 r.popFront();
3747 auto save = r.save;
3748 r.popFront();
3749 assert(r.front == 'l');
3750 assert(save.front == 'e');
3751 }
3752
3753 @safe pure unittest
3754 {
3755 assert(testAliasedString!leftJustifier("hello", 2));
3756 }
3757
3758 /++
3759 Right justify $(D s) in a field $(D width) characters wide. $(D fillChar)
3760 is the character that will be used to fill up the space in the field that
3761 $(D s) doesn't fill.
3762
3763 Params:
3764 s = string
3765 width = minimum field width
3766 fillChar = used to pad end up to $(D width) characters
3767
3768 Returns:
3769 GC allocated string
3770
3771 See_Also:
3772 $(LREF rightJustifier), which does not allocate
3773 +/
3774 S rightJustify(S)(S s, size_t width, dchar fillChar = ' ')
3775 if (isSomeString!S)
3776 {
3777 import std.array : array;
3778 return rightJustifier(s, width, fillChar).array;
3779 }
3780
3781 ///
3782 @safe pure unittest
3783 {
3784 assert(rightJustify("hello", 7, 'X') == "XXhello");
3785 assert(rightJustify("hello", 2, 'X') == "hello");
3786 assert(rightJustify("hello", 9, 'X') == "XXXXhello");
3787 }
3788
3789 /++
3790 Right justify $(D s) in a field $(D width) characters wide. $(D fillChar)
3791 is the character that will be used to fill up the space in the field that
3792 $(D s) doesn't fill.
3793
3794 Params:
3795 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
3796 of characters
3797 width = minimum field width
3798 fillChar = used to pad end up to $(D width) characters
3799
3800 Returns:
3801 a lazy range of the right justified result
3802
3803 See_Also:
3804 $(LREF leftJustifier)
3805 +/
3806
3807 auto rightJustifier(Range)(Range r, size_t width, dchar fillChar = ' ')
3808 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
3809 !isConvertibleToString!Range)
3810 {
3811 alias C = Unqual!(ElementEncodingType!Range);
3812
3813 static if (C.sizeof == 1)
3814 {
3815 import std.utf : byDchar, byChar;
3816 return rightJustifier(r.byDchar, width, fillChar).byChar;
3817 }
3818 else static if (C.sizeof == 2)
3819 {
3820 import std.utf : byDchar, byWchar;
3821 return rightJustifier(r.byDchar, width, fillChar).byWchar;
3822 }
3823 else static if (C.sizeof == 4)
3824 {
3825 static struct Result
3826 {
3827 private:
3828 Range _input;
3829 size_t _width;
3830 alias nfill = _width; // number of fill characters to prepend
3831 dchar _fillChar;
3832 bool inited;
3833
3834 // Lazy initialization so constructor is trivial and cannot fail
3835 void initialize()
3836 {
3837 // Replace _width with nfill
3838 // (use alias instead of union because CTFE cannot deal with unions)
3839 assert(_width);
3840 static if (hasLength!Range)
3841 {
3842 immutable len = _input.length;
3843 nfill = (_width > len) ? _width - len : 0;
3844 }
3845 else
3846 {
3847 // Lookahead to see now many fill characters are needed
3848 import std.range : take;
3849 import std.range.primitives : walkLength;
3850 nfill = _width - walkLength(_input.save.take(_width), _width);
3851 }
3852 inited = true;
3853 }
3854
3855 public:
3856 this(Range input, size_t width, dchar fillChar) pure nothrow
3857 {
3858 _input = input;
3859 _fillChar = fillChar;
3860 _width = width;
3861 }
3862
3863 @property bool empty()
3864 {
3865 return !nfill && _input.empty;
3866 }
3867
3868 @property C front()
3869 {
3870 if (!nfill)
3871 return _input.front; // fast path
3872 if (!inited)
3873 initialize();
3874 return nfill ? _fillChar : _input.front;
3875 }
3876
3877 void popFront()
3878 {
3879 if (!nfill)
3880 _input.popFront(); // fast path
3881 else
3882 {
3883 if (!inited)
3884 initialize();
3885 if (nfill)
3886 --nfill;
3887 else
3888 _input.popFront();
3889 }
3890 }
3891
3892 @property typeof(this) save()
3893 {
3894 auto ret = this;
3895 ret._input = _input.save;
3896 return ret;
3897 }
3898 }
3899
3900 return Result(r, width, fillChar);
3901 }
3902 else
3903 static assert(0);
3904 }
3905
3906 ///
3907 @safe pure @nogc nothrow
3908 unittest
3909 {
3910 import std.algorithm.comparison : equal;
3911 import std.utf : byChar;
3912 assert(rightJustifier("hello", 2).equal("hello".byChar));
3913 assert(rightJustifier("hello", 7).equal(" hello".byChar));
3914 assert(rightJustifier("hello", 7, 'x').equal("xxhello".byChar));
3915 }
3916
3917 auto rightJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ')
3918 if (isConvertibleToString!Range)
3919 {
3920 return rightJustifier!(StringTypeOf!Range)(r, width, fillChar);
3921 }
3922
3923 @safe pure unittest
3924 {
3925 assert(testAliasedString!rightJustifier("hello", 2));
3926 }
3927
3928 @safe pure unittest
3929 {
3930 auto r = "hello"d.rightJustifier(6);
3931 r.popFront();
3932 auto save = r.save;
3933 r.popFront();
3934 assert(r.front == 'e');
3935 assert(save.front == 'h');
3936
3937 auto t = "hello".rightJustifier(7);
3938 t.popFront();
3939 assert(t.front == ' ');
3940 t.popFront();
3941 assert(t.front == 'h');
3942
3943 auto u = "hello"d.rightJustifier(5);
3944 u.popFront();
3945 u.popFront();
3946 u.popFront();
3947 }
3948
3949 /++
3950 Center $(D s) in a field $(D width) characters wide. $(D fillChar)
3951 is the character that will be used to fill up the space in the field that
3952 $(D s) doesn't fill.
3953
3954 Params:
3955 s = The string to center
3956 width = Width of the field to center `s` in
3957 fillChar = The character to use for filling excess space in the field
3958
3959 Returns:
3960 The resulting _center-justified string. The returned string is
3961 GC-allocated. To avoid GC allocation, use $(LREF centerJustifier)
3962 instead.
3963 +/
3964 S center(S)(S s, size_t width, dchar fillChar = ' ')
3965 if (isSomeString!S)
3966 {
3967 import std.array : array;
3968 return centerJustifier(s, width, fillChar).array;
3969 }
3970
3971 ///
3972 @safe pure unittest
3973 {
3974 assert(center("hello", 7, 'X') == "XhelloX");
3975 assert(center("hello", 2, 'X') == "hello");
3976 assert(center("hello", 9, 'X') == "XXhelloXX");
3977 }
3978
3979 @safe pure
3980 unittest
3981 {
3982 import std.conv : to;
3983 import std.exception : assertCTFEable;
3984
3985 assertCTFEable!(
3986 {
3987 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3988 {
3989 S s = to!S("hello");
3990
3991 assert(leftJustify(s, 2) == "hello");
3992 assert(rightJustify(s, 2) == "hello");
3993 assert(center(s, 2) == "hello");
3994
3995 assert(leftJustify(s, 7) == "hello ");
3996 assert(rightJustify(s, 7) == " hello");
3997 assert(center(s, 7) == " hello ");
3998
3999 assert(leftJustify(s, 8) == "hello ");
4000 assert(rightJustify(s, 8) == " hello");
4001 assert(center(s, 8) == " hello ");
4002
4003 assert(leftJustify(s, 8, '\u0100') == "hello\u0100\u0100\u0100");
4004 assert(rightJustify(s, 8, '\u0100') == "\u0100\u0100\u0100hello");
4005 assert(center(s, 8, '\u0100') == "\u0100hello\u0100\u0100");
4006
4007 assert(leftJustify(s, 8, 'ö') == "helloööö");
4008 assert(rightJustify(s, 8, 'ö') == "öööhello");
4009 assert(center(s, 8, 'ö') == "öhelloöö");
4010 }
4011 });
4012 }
4013
4014 /++
4015 Center justify $(D r) in a field $(D width) characters wide. $(D fillChar)
4016 is the character that will be used to fill up the space in the field that
4017 $(D r) doesn't fill.
4018
4019 Params:
4020 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
4021 of characters
4022 width = minimum field width
4023 fillChar = used to pad end up to $(D width) characters
4024
4025 Returns:
4026 a lazy range of the center justified result
4027
4028 See_Also:
4029 $(LREF leftJustifier)
4030 $(LREF rightJustifier)
4031 +/
4032
4033 auto centerJustifier(Range)(Range r, size_t width, dchar fillChar = ' ')
4034 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
4035 !isConvertibleToString!Range)
4036 {
4037 alias C = Unqual!(ElementEncodingType!Range);
4038
4039 static if (C.sizeof == 1)
4040 {
4041 import std.utf : byDchar, byChar;
4042 return centerJustifier(r.byDchar, width, fillChar).byChar;
4043 }
4044 else static if (C.sizeof == 2)
4045 {
4046 import std.utf : byDchar, byWchar;
4047 return centerJustifier(r.byDchar, width, fillChar).byWchar;
4048 }
4049 else static if (C.sizeof == 4)
4050 {
4051 import std.range : chain, repeat;
4052 import std.range.primitives : walkLength;
4053
4054 auto len = walkLength(r.save, width);
4055 if (len > width)
4056 len = width;
4057 const nleft = (width - len) / 2;
4058 const nright = width - len - nleft;
4059 return chain(repeat(fillChar, nleft), r, repeat(fillChar, nright));
4060 }
4061 else
4062 static assert(0);
4063 }
4064
4065 ///
4066 @safe pure @nogc nothrow
4067 unittest
4068 {
4069 import std.algorithm.comparison : equal;
4070 import std.utf : byChar;
4071 assert(centerJustifier("hello", 2).equal("hello".byChar));
4072 assert(centerJustifier("hello", 8).equal(" hello ".byChar));
4073 assert(centerJustifier("hello", 7, 'x').equal("xhellox".byChar));
4074 }
4075
4076 auto centerJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ')
4077 if (isConvertibleToString!Range)
4078 {
4079 return centerJustifier!(StringTypeOf!Range)(r, width, fillChar);
4080 }
4081
4082 @safe pure unittest
4083 {
4084 assert(testAliasedString!centerJustifier("hello", 8));
4085 }
4086
4087 @system unittest
4088 {
4089 static auto byFwdRange(dstring s)
4090 {
4091 static struct FRange
4092 {
4093 dstring str;
4094 this(dstring s) { str = s; }
4095 @property bool empty() { return str.length == 0; }
4096 @property dchar front() { return str[0]; }
4097 void popFront() { str = str[1 .. $]; }
4098 @property FRange save() { return this; }
4099 }
4100 return FRange(s);
4101 }
4102
4103 auto r = centerJustifier(byFwdRange("hello"d), 6);
4104 r.popFront();
4105 auto save = r.save;
4106 r.popFront();
4107 assert(r.front == 'l');
4108 assert(save.front == 'e');
4109
4110 auto t = "hello".centerJustifier(7);
4111 t.popFront();
4112 assert(t.front == 'h');
4113 t.popFront();
4114 assert(t.front == 'e');
4115
4116 auto u = byFwdRange("hello"d).centerJustifier(6);
4117 u.popFront();
4118 u.popFront();
4119 u.popFront();
4120 u.popFront();
4121 u.popFront();
4122 u.popFront();
4123 }
4124
4125
4126 /++
4127 Replace each tab character in $(D s) with the number of spaces necessary
4128 to align the following character at the next tab stop.
4129
4130 Params:
4131 s = string
4132 tabSize = distance between tab stops
4133
4134 Returns:
4135 GC allocated string with tabs replaced with spaces
4136 +/
4137 auto detab(Range)(auto ref Range s, size_t tabSize = 8) pure
4138 if ((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range))
4139 || __traits(compiles, StringTypeOf!Range))
4140 {
4141 import std.array : array;
4142 return detabber(s, tabSize).array;
4143 }
4144
4145 ///
4146 @system pure unittest
4147 {
4148 assert(detab(" \n\tx", 9) == " \n x");
4149 }
4150
4151 @safe pure unittest
4152 {
4153 static struct TestStruct
4154 {
4155 string s;
4156 alias s this;
4157 }
4158
4159 static struct TestStruct2
4160 {
4161 string s;
4162 alias s this;
4163 @disable this(this);
4164 }
4165
4166 string s = " \n\tx";
4167 string cmp = " \n x";
4168 auto t = TestStruct(s);
4169 assert(detab(t, 9) == cmp);
4170 assert(detab(TestStruct(s), 9) == cmp);
4171 assert(detab(TestStruct(s), 9) == detab(TestStruct(s), 9));
4172 assert(detab(TestStruct2(s), 9) == detab(TestStruct2(s), 9));
4173 assert(detab(TestStruct2(s), 9) == cmp);
4174 }
4175
4176 /++
4177 Replace each tab character in $(D r) with the number of spaces
4178 necessary to align the following character at the next tab stop.
4179
4180 Params:
4181 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
4182 tabSize = distance between tab stops
4183
4184 Returns:
4185 lazy forward range with tabs replaced with spaces
4186 +/
4187 auto detabber(Range)(Range r, size_t tabSize = 8)
4188 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
4189 !isConvertibleToString!Range)
4190 {
4191 import std.uni : lineSep, paraSep, nelSep;
4192 import std.utf : codeUnitLimit, decodeFront;
4193
4194 assert(tabSize > 0);
4195
4196 alias C = Unqual!(ElementEncodingType!(Range));
4197
4198 static struct Result
4199 {
4200 private:
4201 Range _input;
4202 size_t _tabSize;
4203 size_t nspaces;
4204 int column;
4205 size_t index;
4206
4207 public:
4208
4209 this(Range input, size_t tabSize)
4210 {
4211 _input = input;
4212 _tabSize = tabSize;
4213 }
4214
4215 static if (isInfinite!(Range))
4216 {
4217 enum bool empty = false;
4218 }
4219 else
4220 {
4221 @property bool empty()
4222 {
4223 return _input.empty && nspaces == 0;
4224 }
4225 }
4226
4227 @property C front()
4228 {
4229 if (nspaces)
4230 return ' ';
4231 static if (isSomeString!(Range))
4232 C c = _input[0];
4233 else
4234 C c = _input.front;
4235 if (index)
4236 return c;
4237 dchar dc;
4238 if (c < codeUnitLimit!(immutable(C)[]))
4239 {
4240 dc = c;
4241 index = 1;
4242 }
4243 else
4244 {
4245 auto r = _input.save;
4246 dc = decodeFront(r, index); // lookahead to decode
4247 }
4248 switch (dc)
4249 {
4250 case '\r':
4251 case '\n':
4252 case paraSep:
4253 case lineSep:
4254 case nelSep:
4255 column = 0;
4256 break;
4257
4258 case '\t':
4259 nspaces = _tabSize - (column % _tabSize);
4260 column += nspaces;
4261 c = ' ';
4262 break;
4263
4264 default:
4265 ++column;
4266 break;
4267 }
4268 return c;
4269 }
4270
4271 void popFront()
4272 {
4273 if (!index)
4274 front;
4275 if (nspaces)
4276 --nspaces;
4277 if (!nspaces)
4278 {
4279 static if (isSomeString!(Range))
4280 _input = _input[1 .. $];
4281 else
4282 _input.popFront();
4283 --index;
4284 }
4285 }
4286
4287 @property typeof(this) save()
4288 {
4289 auto ret = this;
4290 ret._input = _input.save;
4291 return ret;
4292 }
4293 }
4294
4295 return Result(r, tabSize);
4296 }
4297
4298 ///
4299 @system pure unittest
4300 {
4301 import std.array : array;
4302
4303 assert(detabber(" \n\tx", 9).array == " \n x");
4304 }
4305
4306 auto detabber(Range)(auto ref Range r, size_t tabSize = 8)
4307 if (isConvertibleToString!Range)
4308 {
4309 return detabber!(StringTypeOf!Range)(r, tabSize);
4310 }
4311
4312 @safe pure unittest
4313 {
4314 assert(testAliasedString!detabber( " ab\t asdf ", 8));
4315 }
4316
4317 @system pure unittest
4318 {
4319 import std.algorithm.comparison : cmp;
4320 import std.conv : to;
4321 import std.exception : assertCTFEable;
4322
4323 assertCTFEable!(
4324 {
4325 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
4326 {
4327 S s = to!S("This \tis\t a fofof\tof list");
4328 assert(cmp(detab(s), "This is a fofof of list") == 0);
4329
4330 assert(detab(cast(S) null) is null);
4331 assert(detab("").empty);
4332 assert(detab("a") == "a");
4333 assert(detab("\t") == " ");
4334 assert(detab("\t", 3) == " ");
4335 assert(detab("\t", 9) == " ");
4336 assert(detab( " ab\t asdf ") == " ab asdf ");
4337 assert(detab( " \U00010000b\tasdf ") == " \U00010000b asdf ");
4338 assert(detab("\r\t", 9) == "\r ");
4339 assert(detab("\n\t", 9) == "\n ");
4340 assert(detab("\u0085\t", 9) == "\u0085 ");
4341 assert(detab("\u2028\t", 9) == "\u2028 ");
4342 assert(detab(" \u2029\t", 9) == " \u2029 ");
4343 }
4344 });
4345 }
4346
4347 ///
4348 @system pure unittest
4349 {
4350 import std.array : array;
4351 import std.utf : byChar, byWchar;
4352
4353 assert(detabber(" \u2029\t".byChar, 9).array == " \u2029 ");
4354 auto r = "hel\tx".byWchar.detabber();
4355 assert(r.front == 'h');
4356 auto s = r.save;
4357 r.popFront();
4358 r.popFront();
4359 assert(r.front == 'l');
4360 assert(s.front == 'h');
4361 }
4362
4363 /++
4364 Replaces spaces in $(D s) with the optimal number of tabs.
4365 All spaces and tabs at the end of a line are removed.
4366
4367 Params:
4368 s = String to convert.
4369 tabSize = Tab columns are $(D tabSize) spaces apart.
4370
4371 Returns:
4372 GC allocated string with spaces replaced with tabs;
4373 use $(LREF entabber) to not allocate.
4374
4375 See_Also:
4376 $(LREF entabber)
4377 +/
4378 auto entab(Range)(Range s, size_t tabSize = 8)
4379 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range))
4380 {
4381 import std.array : array;
4382 return entabber(s, tabSize).array;
4383 }
4384
4385 ///
4386 @safe pure unittest
4387 {
4388 assert(entab(" x \n") == "\tx\n");
4389 }
4390
4391 auto entab(Range)(auto ref Range s, size_t tabSize = 8)
4392 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) &&
4393 is(StringTypeOf!Range))
4394 {
4395 return entab!(StringTypeOf!Range)(s, tabSize);
4396 }
4397
4398 @safe pure unittest
4399 {
4400 assert(testAliasedString!entab(" x \n"));
4401 }
4402
4403 /++
4404 Replaces spaces in range $(D r) with the optimal number of tabs.
4405 All spaces and tabs at the end of a line are removed.
4406
4407 Params:
4408 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
4409 tabSize = distance between tab stops
4410
4411 Returns:
4412 lazy forward range with spaces replaced with tabs
4413
4414 See_Also:
4415 $(LREF entab)
4416 +/
4417 auto entabber(Range)(Range r, size_t tabSize = 8)
4418 if (isForwardRange!Range && !isConvertibleToString!Range)
4419 {
4420 import std.uni : lineSep, paraSep, nelSep;
4421 import std.utf : codeUnitLimit, decodeFront;
4422
4423 assert(tabSize > 0);
4424 alias C = Unqual!(ElementEncodingType!Range);
4425
4426 static struct Result
4427 {
4428 private:
4429 Range _input;
4430 size_t _tabSize;
4431 size_t nspaces;
4432 size_t ntabs;
4433 int column;
4434 size_t index;
4435
4436 @property C getFront()
4437 {
4438 static if (isSomeString!Range)
4439 return _input[0]; // avoid autodecode
4440 else
4441 return _input.front;
4442 }
4443
4444 public:
4445
4446 this(Range input, size_t tabSize)
4447 {
4448 _input = input;
4449 _tabSize = tabSize;
4450 }
4451
4452 @property bool empty()
4453 {
4454 if (ntabs || nspaces)
4455 return false;
4456
4457 /* Since trailing spaces are removed,
4458 * look ahead for anything that is not a trailing space
4459 */
4460 static if (isSomeString!Range)
4461 {
4462 foreach (c; _input)
4463 {
4464 if (c != ' ' && c != '\t')
4465 return false;
4466 }
4467 return true;
4468 }
4469 else
4470 {
4471 if (_input.empty)
4472 return true;
4473 immutable c = _input.front;
4474 if (c != ' ' && c != '\t')
4475 return false;
4476 auto t = _input.save;
4477 t.popFront();
4478 foreach (c2; t)
4479 {
4480 if (c2 != ' ' && c2 != '\t')
4481 return false;
4482 }
4483 return true;
4484 }
4485 }
4486
4487 @property C front()
4488 {
4489 //writefln(" front(): ntabs = %s nspaces = %s index = %s front = '%s'", ntabs, nspaces, index, getFront);
4490 if (ntabs)
4491 return '\t';
4492 if (nspaces)
4493 return ' ';
4494 C c = getFront;
4495 if (index)
4496 return c;
4497 dchar dc;
4498 if (c < codeUnitLimit!(immutable(C)[]))
4499 {
4500 index = 1;
4501 dc = c;
4502 if (c == ' ' || c == '\t')
4503 {
4504 // Consume input until a non-blank is encountered
4505 immutable startcol = column;
4506 C cx;
4507 static if (isSomeString!Range)
4508 {
4509 while (1)
4510 {
4511 assert(_input.length);
4512 cx = _input[0];
4513 if (cx == ' ')
4514 ++column;
4515 else if (cx == '\t')
4516 column += _tabSize - (column % _tabSize);
4517 else
4518 break;
4519 _input = _input[1 .. $];
4520 }
4521 }
4522 else
4523 {
4524 while (1)
4525 {
4526 assert(!_input.empty);
4527 cx = _input.front;
4528 if (cx == ' ')
4529 ++column;
4530 else if (cx == '\t')
4531 column += _tabSize - (column % _tabSize);
4532 else
4533 break;
4534 _input.popFront();
4535 }
4536 }
4537 // Compute ntabs+nspaces to get from startcol to column
4538 immutable n = column - startcol;
4539 if (n == 1)
4540 {
4541 nspaces = 1;
4542 }
4543 else
4544 {
4545 ntabs = column / _tabSize - startcol / _tabSize;
4546 if (ntabs == 0)
4547 nspaces = column - startcol;
4548 else
4549 nspaces = column % _tabSize;
4550 }
4551 //writefln("\tstartcol = %s, column = %s, _tabSize = %s", startcol, column, _tabSize);
4552 //writefln("\tntabs = %s, nspaces = %s", ntabs, nspaces);
4553 if (cx < codeUnitLimit!(immutable(C)[]))
4554 {
4555 dc = cx;
4556 index = 1;
4557 }
4558 else
4559 {
4560 auto r = _input.save;
4561 dc = decodeFront(r, index); // lookahead to decode
4562 }
4563 switch (dc)
4564 {
4565 case '\r':
4566 case '\n':
4567 case paraSep:
4568 case lineSep:
4569 case nelSep:
4570 column = 0;
4571 // Spaces followed by newline are ignored
4572 ntabs = 0;
4573 nspaces = 0;
4574 return cx;
4575
4576 default:
4577 ++column;
4578 break;
4579 }
4580 return ntabs ? '\t' : ' ';
4581 }
4582 }
4583 else
4584 {
4585 auto r = _input.save;
4586 dc = decodeFront(r, index); // lookahead to decode
4587 }
4588 //writefln("dc = x%x", dc);
4589 switch (dc)
4590 {
4591 case '\r':
4592 case '\n':
4593 case paraSep:
4594 case lineSep:
4595 case nelSep:
4596 column = 0;
4597 break;
4598
4599 default:
4600 ++column;
4601 break;
4602 }
4603 return c;
4604 }
4605
4606 void popFront()
4607 {
4608 //writefln("popFront(): ntabs = %s nspaces = %s index = %s front = '%s'", ntabs, nspaces, index, getFront);
4609 if (!index)
4610 front;
4611 if (ntabs)
4612 --ntabs;
4613 else if (nspaces)
4614 --nspaces;
4615 else if (!ntabs && !nspaces)
4616 {
4617 static if (isSomeString!Range)
4618 _input = _input[1 .. $];
4619 else
4620 _input.popFront();
4621 --index;
4622 }
4623 }
4624
4625 @property typeof(this) save()
4626 {
4627 auto ret = this;
4628 ret._input = _input.save;
4629 return ret;
4630 }
4631 }
4632
4633 return Result(r, tabSize);
4634 }
4635
4636 ///
4637 @safe pure unittest
4638 {
4639 import std.array : array;
4640 assert(entabber(" x \n").array == "\tx\n");
4641 }
4642
4643 auto entabber(Range)(auto ref Range r, size_t tabSize = 8)
4644 if (isConvertibleToString!Range)
4645 {
4646 return entabber!(StringTypeOf!Range)(r, tabSize);
4647 }
4648
4649 @safe pure unittest
4650 {
4651 assert(testAliasedString!entabber(" ab asdf ", 8));
4652 }
4653
4654 @safe pure
4655 unittest
4656 {
4657 import std.conv : to;
4658 import std.exception : assertCTFEable;
4659
4660 assertCTFEable!(
4661 {
4662 assert(entab(cast(string) null) is null);
4663 assert(entab("").empty);
4664 assert(entab("a") == "a");
4665 assert(entab(" ") == "");
4666 assert(entab(" x") == "\tx");
4667 assert(entab(" ab asdf ") == " ab\tasdf");
4668 assert(entab(" ab asdf ") == " ab\t asdf");
4669 assert(entab(" ab \t asdf ") == " ab\t asdf");
4670 assert(entab("1234567 \ta") == "1234567\t\ta");
4671 assert(entab("1234567 \ta") == "1234567\t\ta");
4672 assert(entab("1234567 \ta") == "1234567\t\ta");
4673 assert(entab("1234567 \ta") == "1234567\t\ta");
4674 assert(entab("1234567 \ta") == "1234567\t\ta");
4675 assert(entab("1234567 \ta") == "1234567\t\ta");
4676 assert(entab("1234567 \ta") == "1234567\t\ta");
4677 assert(entab("1234567 \ta") == "1234567\t\ta");
4678 assert(entab("1234567 \ta") == "1234567\t\t\ta");
4679
4680 assert(entab("a ") == "a");
4681 assert(entab("a\v") == "a\v");
4682 assert(entab("a\f") == "a\f");
4683 assert(entab("a\n") == "a\n");
4684 assert(entab("a\n\r") == "a\n\r");
4685 assert(entab("a\r\n") == "a\r\n");
4686 assert(entab("a\u2028") == "a\u2028");
4687 assert(entab("a\u2029") == "a\u2029");
4688 assert(entab("a\u0085") == "a\u0085");
4689 assert(entab("a ") == "a");
4690 assert(entab("a\t") == "a");
4691 assert(entab("\uFF28\uFF45\uFF4C\uFF4C567 \t\uFF4F \t") ==
4692 "\uFF28\uFF45\uFF4C\uFF4C567\t\t\uFF4F");
4693 assert(entab(" \naa") == "\naa");
4694 assert(entab(" \r aa") == "\r aa");
4695 assert(entab(" \u2028 aa") == "\u2028 aa");
4696 assert(entab(" \u2029 aa") == "\u2029 aa");
4697 assert(entab(" \u0085 aa") == "\u0085 aa");
4698 });
4699 }
4700
4701 @safe pure
4702 unittest
4703 {
4704 import std.array : array;
4705 import std.utf : byChar;
4706 assert(entabber(" \u0085 aa".byChar).array == "\u0085 aa");
4707 assert(entabber(" \u2028\t aa \t".byChar).array == "\u2028\t aa");
4708
4709 auto r = entabber("1234", 4);
4710 r.popFront();
4711 auto rsave = r.save;
4712 r.popFront();
4713 assert(r.front == '3');
4714 assert(rsave.front == '2');
4715 }
4716
4717
4718 /++
4719 Replaces the characters in $(D str) which are keys in $(D transTable) with
4720 their corresponding values in $(D transTable). $(D transTable) is an AA
4721 where its keys are $(D dchar) and its values are either $(D dchar) or some
4722 type of string. Also, if $(D toRemove) is given, the characters in it are
4723 removed from $(D str) prior to translation. $(D str) itself is unaltered.
4724 A copy with the changes is returned.
4725
4726 See_Also:
4727 $(LREF tr)
4728 $(REF replace, std,array)
4729
4730 Params:
4731 str = The original string.
4732 transTable = The AA indicating which characters to replace and what to
4733 replace them with.
4734 toRemove = The characters to remove from the string.
4735 +/
4736 C1[] translate(C1, C2 = immutable char)(C1[] str,
4737 in dchar[dchar] transTable,
4738 const(C2)[] toRemove = null) @safe pure
4739 if (isSomeChar!C1 && isSomeChar!C2)
4740 {
4741 import std.array : appender;
4742 auto buffer = appender!(C1[])();
4743 translateImpl(str, transTable, toRemove, buffer);
4744 return buffer.data;
4745 }
4746
4747 ///
4748 @safe pure unittest
4749 {
4750 dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
4751 assert(translate("hello world", transTable1) == "h5ll7 w7rld");
4752
4753 assert(translate("hello world", transTable1, "low") == "h5 rd");
4754
4755 string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
4756 assert(translate("hello world", transTable2) == "h5llorange worangerld");
4757 }
4758
4759 @safe pure unittest // issue 13018
4760 {
4761 immutable dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
4762 assert(translate("hello world", transTable1) == "h5ll7 w7rld");
4763
4764 assert(translate("hello world", transTable1, "low") == "h5 rd");
4765
4766 immutable string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
4767 assert(translate("hello world", transTable2) == "h5llorange worangerld");
4768 }
4769
4770 @system pure unittest
4771 {
4772 import std.conv : to;
4773 import std.exception : assertCTFEable;
4774
4775 assertCTFEable!(
4776 {
4777 foreach (S; AliasSeq!( char[], const( char)[], immutable( char)[],
4778 wchar[], const(wchar)[], immutable(wchar)[],
4779 dchar[], const(dchar)[], immutable(dchar)[]))
4780 {
4781 assert(translate(to!S("hello world"), cast(dchar[dchar])['h' : 'q', 'l' : '5']) ==
4782 to!S("qe55o wor5d"));
4783 assert(translate(to!S("hello world"), cast(dchar[dchar])['o' : 'l', 'l' : '\U00010143']) ==
4784 to!S("he\U00010143\U00010143l wlr\U00010143d"));
4785 assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['h' : 'q', 'l': '5']) ==
4786 to!S("qe55o \U00010143 wor5d"));
4787 assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['o' : '0', '\U00010143' : 'o']) ==
4788 to!S("hell0 o w0rld"));
4789 assert(translate(to!S("hello world"), cast(dchar[dchar]) null) == to!S("hello world"));
4790
4791 foreach (T; AliasSeq!( char[], const( char)[], immutable( char)[],
4792 wchar[], const(wchar)[], immutable(wchar)[],
4793 dchar[], const(dchar)[], immutable(dchar)[]))
4794 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
4795 foreach (R; AliasSeq!(dchar[dchar], const dchar[dchar],
4796 immutable dchar[dchar]))
4797 {
4798 R tt = ['h' : 'q', 'l' : '5'];
4799 assert(translate(to!S("hello world"), tt, to!T("r"))
4800 == to!S("qe55o wo5d"));
4801 assert(translate(to!S("hello world"), tt, to!T("helo"))
4802 == to!S(" wrd"));
4803 assert(translate(to!S("hello world"), tt, to!T("q5"))
4804 == to!S("qe55o wor5d"));
4805 }
4806 }();
4807
4808 auto s = to!S("hello world");
4809 dchar[dchar] transTable = ['h' : 'q', 'l' : '5'];
4810 static assert(is(typeof(s) == typeof(translate(s, transTable))));
4811 }
4812 });
4813 }
4814
4815 /++ Ditto +/
4816 C1[] translate(C1, S, C2 = immutable char)(C1[] str,
4817 in S[dchar] transTable,
4818 const(C2)[] toRemove = null) @safe pure
4819 if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2)
4820 {
4821 import std.array : appender;
4822 auto buffer = appender!(C1[])();
4823 translateImpl(str, transTable, toRemove, buffer);
4824 return buffer.data;
4825 }
4826
4827 @system pure unittest
4828 {
4829 import std.conv : to;
4830 import std.exception : assertCTFEable;
4831
4832 assertCTFEable!(
4833 {
4834 foreach (S; AliasSeq!( char[], const( char)[], immutable( char)[],
4835 wchar[], const(wchar)[], immutable(wchar)[],
4836 dchar[], const(dchar)[], immutable(dchar)[]))
4837 {
4838 assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"]) ==
4839 to!S("yellowe4242o wor42d"));
4840 assert(translate(to!S("hello world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) ==
4841 to!S("he\U00010143\U00010143\U00010143\U00010143owl wowlr\U00010143\U00010143d"));
4842 assert(translate(to!S("hello \U00010143 world"), ['h' : "yellow", 'l' : "42"]) ==
4843 to!S("yellowe4242o \U00010143 wor42d"));
4844 assert(translate(to!S("hello \U00010143 world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) ==
4845 to!S("he\U00010143\U00010143\U00010143\U00010143owl \U00010143 wowlr\U00010143\U00010143d"));
4846 assert(translate(to!S("hello \U00010143 world"), ['h' : ""]) ==
4847 to!S("ello \U00010143 world"));
4848 assert(translate(to!S("hello \U00010143 world"), ['\U00010143' : ""]) ==
4849 to!S("hello world"));
4850 assert(translate(to!S("hello world"), cast(string[dchar]) null) == to!S("hello world"));
4851
4852 foreach (T; AliasSeq!( char[], const( char)[], immutable( char)[],
4853 wchar[], const(wchar)[], immutable(wchar)[],
4854 dchar[], const(dchar)[], immutable(dchar)[]))
4855 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
4856
4857 foreach (R; AliasSeq!(string[dchar], const string[dchar],
4858 immutable string[dchar]))
4859 {
4860 R tt = ['h' : "yellow", 'l' : "42"];
4861 assert(translate(to!S("hello world"), tt, to!T("r")) ==
4862 to!S("yellowe4242o wo42d"));
4863 assert(translate(to!S("hello world"), tt, to!T("helo")) ==
4864 to!S(" wrd"));
4865 assert(translate(to!S("hello world"), tt, to!T("y42")) ==
4866 to!S("yellowe4242o wor42d"));
4867 assert(translate(to!S("hello world"), tt, to!T("hello world")) ==
4868 to!S(""));
4869 assert(translate(to!S("hello world"), tt, to!T("42")) ==
4870 to!S("yellowe4242o wor42d"));
4871 }
4872 }();
4873
4874 auto s = to!S("hello world");
4875 string[dchar] transTable = ['h' : "silly", 'l' : "putty"];
4876 static assert(is(typeof(s) == typeof(translate(s, transTable))));
4877 }
4878 });
4879 }
4880
4881 /++
4882 This is an overload of $(D translate) which takes an existing buffer to write the contents to.
4883
4884 Params:
4885 str = The original string.
4886 transTable = The AA indicating which characters to replace and what to
4887 replace them with.
4888 toRemove = The characters to remove from the string.
4889 buffer = An output range to write the contents to.
4890 +/
4891 void translate(C1, C2 = immutable char, Buffer)(C1[] str,
4892 in dchar[dchar] transTable,
4893 const(C2)[] toRemove,
4894 Buffer buffer)
4895 if (isSomeChar!C1 && isSomeChar!C2 && isOutputRange!(Buffer, C1))
4896 {
4897 translateImpl(str, transTable, toRemove, buffer);
4898 }
4899
4900 ///
4901 @safe pure unittest
4902 {
4903 import std.array : appender;
4904 dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
4905 auto buffer = appender!(dchar[])();
4906 translate("hello world", transTable1, null, buffer);
4907 assert(buffer.data == "h5ll7 w7rld");
4908
4909 buffer.clear();
4910 translate("hello world", transTable1, "low", buffer);
4911 assert(buffer.data == "h5 rd");
4912
4913 buffer.clear();
4914 string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
4915 translate("hello world", transTable2, null, buffer);
4916 assert(buffer.data == "h5llorange worangerld");
4917 }
4918
4919 @safe pure unittest // issue 13018
4920 {
4921 import std.array : appender;
4922 immutable dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
4923 auto buffer = appender!(dchar[])();
4924 translate("hello world", transTable1, null, buffer);
4925 assert(buffer.data == "h5ll7 w7rld");
4926
4927 buffer.clear();
4928 translate("hello world", transTable1, "low", buffer);
4929 assert(buffer.data == "h5 rd");
4930
4931 buffer.clear();
4932 immutable string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
4933 translate("hello world", transTable2, null, buffer);
4934 assert(buffer.data == "h5llorange worangerld");
4935 }
4936
4937 /++ Ditto +/
4938 void translate(C1, S, C2 = immutable char, Buffer)(C1[] str,
4939 in S[dchar] transTable,
4940 const(C2)[] toRemove,
4941 Buffer buffer)
4942 if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2 && isOutputRange!(Buffer, S))
4943 {
4944 translateImpl(str, transTable, toRemove, buffer);
4945 }
4946
4947 private void translateImpl(C1, T, C2, Buffer)(C1[] str,
4948 T transTable,
4949 const(C2)[] toRemove,
4950 Buffer buffer)
4951 {
4952 bool[dchar] removeTable;
4953
4954 foreach (dchar c; toRemove)
4955 removeTable[c] = true;
4956
4957 foreach (dchar c; str)
4958 {
4959 if (c in removeTable)
4960 continue;
4961
4962 auto newC = c in transTable;
4963
4964 if (newC)
4965 put(buffer, *newC);
4966 else
4967 put(buffer, c);
4968 }
4969 }
4970
4971 /++
4972 This is an $(I $(RED ASCII-only)) overload of $(LREF _translate). It
4973 will $(I not) work with Unicode. It exists as an optimization for the
4974 cases where Unicode processing is not necessary.
4975
4976 Unlike the other overloads of $(LREF _translate), this one does not take
4977 an AA. Rather, it takes a $(D string) generated by $(LREF makeTransTable).
4978
4979 The array generated by $(D makeTransTable) is $(D 256) elements long such that
4980 the index is equal to the ASCII character being replaced and the value is
4981 equal to the character that it's being replaced with. Note that translate
4982 does not decode any of the characters, so you can actually pass it Extended
4983 ASCII characters if you want to (ASCII only actually uses $(D 128)
4984 characters), but be warned that Extended ASCII characters are not valid
4985 Unicode and therefore will result in a $(D UTFException) being thrown from
4986 most other Phobos functions.
4987
4988 Also, because no decoding occurs, it is possible to use this overload to
4989 translate ASCII characters within a proper UTF-8 string without altering the
4990 other, non-ASCII characters. It's replacing any code unit greater than
4991 $(D 127) with another code unit or replacing any code unit with another code
4992 unit greater than $(D 127) which will cause UTF validation issues.
4993
4994 See_Also:
4995 $(LREF tr)
4996 $(REF replace, std,array)
4997
4998 Params:
4999 str = The original string.
5000 transTable = The string indicating which characters to replace and what
5001 to replace them with. It is generated by $(LREF makeTransTable).
5002 toRemove = The characters to remove from the string.
5003 +/
5004 C[] translate(C = immutable char)(in char[] str, in char[] transTable, in char[] toRemove = null) @trusted pure nothrow
5005 if (is(Unqual!C == char))
5006 in
5007 {
5008 assert(transTable.length == 256);
5009 }
5010 body
5011 {
5012 bool[256] remTable = false;
5013
5014 foreach (char c; toRemove)
5015 remTable[c] = true;
5016
5017 size_t count = 0;
5018 foreach (char c; str)
5019 {
5020 if (!remTable[c])
5021 ++count;
5022 }
5023
5024 auto buffer = new char[count];
5025
5026 size_t i = 0;
5027 foreach (char c; str)
5028 {
5029 if (!remTable[c])
5030 buffer[i++] = transTable[c];
5031 }
5032
5033 return cast(C[])(buffer);
5034 }
5035
5036
5037 /**
5038 * Do same thing as $(LREF makeTransTable) but allocate the translation table
5039 * on the GC heap.
5040 *
5041 * Use $(LREF makeTransTable) instead.
5042 */
5043 string makeTrans(in char[] from, in char[] to) @trusted pure nothrow
5044 {
5045 return makeTransTable(from, to)[].idup;
5046 }
5047
5048 ///
5049 @safe pure nothrow unittest
5050 {
5051 auto transTable1 = makeTrans("eo5", "57q");
5052 assert(translate("hello world", transTable1) == "h5ll7 w7rld");
5053
5054 assert(translate("hello world", transTable1, "low") == "h5 rd");
5055 }
5056
5057 /*******
5058 * Construct 256 character translation table, where characters in from[] are replaced
5059 * by corresponding characters in to[].
5060 *
5061 * Params:
5062 * from = array of chars, less than or equal to 256 in length
5063 * to = corresponding array of chars to translate to
5064 * Returns:
5065 * translation array
5066 */
5067
5068 char[256] makeTransTable(in char[] from, in char[] to) @safe pure nothrow @nogc
5069 in
5070 {
5071 import std.ascii : isASCII;
5072 assert(from.length == to.length);
5073 assert(from.length <= 256);
5074 foreach (char c; from)
5075 assert(isASCII(c));
5076 foreach (char c; to)
5077 assert(isASCII(c));
5078 }
5079 body
5080 {
5081 char[256] result = void;
5082
5083 foreach (i; 0 .. result.length)
5084 result[i] = cast(char) i;
5085 foreach (i, c; from)
5086 result[c] = to[i];
5087 return result;
5088 }
5089
5090 @safe pure unittest
5091 {
5092 import std.conv : to;
5093 import std.exception : assertCTFEable;
5094
5095 assertCTFEable!(
5096 {
5097 foreach (C; AliasSeq!(char, const char, immutable char))
5098 {
5099 assert(translate!C("hello world", makeTransTable("hl", "q5")) == to!(C[])("qe55o wor5d"));
5100
5101 auto s = to!(C[])("hello world");
5102 auto transTable = makeTransTable("hl", "q5");
5103 static assert(is(typeof(s) == typeof(translate!C(s, transTable))));
5104 }
5105
5106 foreach (S; AliasSeq!(char[], const(char)[], immutable(char)[]))
5107 {
5108 assert(translate(to!S("hello world"), makeTransTable("hl", "q5")) == to!S("qe55o wor5d"));
5109 assert(translate(to!S("hello \U00010143 world"), makeTransTable("hl", "q5")) ==
5110 to!S("qe55o \U00010143 wor5d"));
5111 assert(translate(to!S("hello world"), makeTransTable("ol", "1o")) == to!S("heoo1 w1rod"));
5112 assert(translate(to!S("hello world"), makeTransTable("", "")) == to!S("hello world"));
5113 assert(translate(to!S("hello world"), makeTransTable("12345", "67890")) == to!S("hello world"));
5114 assert(translate(to!S("hello \U00010143 world"), makeTransTable("12345", "67890")) ==
5115 to!S("hello \U00010143 world"));
5116
5117 foreach (T; AliasSeq!(char[], const(char)[], immutable(char)[]))
5118 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
5119 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("r")) ==
5120 to!S("qe55o wo5d"));
5121 assert(translate(to!S("hello \U00010143 world"), makeTransTable("hl", "q5"), to!T("r")) ==
5122 to!S("qe55o \U00010143 wo5d"));
5123 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("helo")) ==
5124 to!S(" wrd"));
5125 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("q5")) ==
5126 to!S("qe55o wor5d"));
5127 }();
5128 }
5129 });
5130 }
5131
5132 /++
5133 This is an $(I $(RED ASCII-only)) overload of $(D translate) which takes an existing buffer to write the contents to.
5134
5135 Params:
5136 str = The original string.
5137 transTable = The string indicating which characters to replace and what
5138 to replace them with. It is generated by $(LREF makeTransTable).
5139 toRemove = The characters to remove from the string.
5140 buffer = An output range to write the contents to.
5141 +/
5142 void translate(C = immutable char, Buffer)(in char[] str, in char[] transTable,
5143 in char[] toRemove, Buffer buffer) @trusted pure
5144 if (is(Unqual!C == char) && isOutputRange!(Buffer, char))
5145 in
5146 {
5147 assert(transTable.length == 256);
5148 }
5149 body
5150 {
5151 bool[256] remTable = false;
5152
5153 foreach (char c; toRemove)
5154 remTable[c] = true;
5155
5156 foreach (char c; str)
5157 {
5158 if (!remTable[c])
5159 put(buffer, transTable[c]);
5160 }
5161 }
5162
5163 ///
5164 @safe pure unittest
5165 {
5166 import std.array : appender;
5167 auto buffer = appender!(char[])();
5168 auto transTable1 = makeTransTable("eo5", "57q");
5169 translate("hello world", transTable1, null, buffer);
5170 assert(buffer.data == "h5ll7 w7rld");
5171
5172 buffer.clear();
5173 translate("hello world", transTable1, "low", buffer);
5174 assert(buffer.data == "h5 rd");
5175 }
5176
5177 //@@@DEPRECATED_2018-05@@@
5178 /***********************************************
5179 * $(RED This function is deprecated and will be removed May 2018.)
5180 * Please use the functions in $(MREF std, regex) and $(MREF std, algorithm)
5181 * instead. If you still need this function, it will be available in
5182 * $(LINK2 https://github.com/dlang/undeaD, undeaD).
5183 *
5184 * See if character c is in the pattern.
5185 * Patterns:
5186 *
5187 * A $(I pattern) is an array of characters much like a $(I character
5188 * class) in regular expressions. A sequence of characters
5189 * can be given, such as "abcde". The '-' can represent a range
5190 * of characters, as "a-e" represents the same pattern as "abcde".
5191 * "a-fA-F0-9" represents all the hex characters.
5192 * If the first character of a pattern is '^', then the pattern
5193 * is negated, i.e. "^0-9" means any character except a digit.
5194 * The functions inPattern, $(B countchars), $(B removeschars),
5195 * and $(B squeeze) use patterns.
5196 *
5197 * Note: In the future, the pattern syntax may be improved
5198 * to be more like regular expression character classes.
5199 */
5200 deprecated("This function is obsolete and will be removed May 2018. See the docs for more details")
5201 bool inPattern(S)(dchar c, in S pattern) @safe pure @nogc
5202 if (isSomeString!S)
5203 {
5204 bool result = false;
5205 int range = 0;
5206 dchar lastc;
5207
5208 foreach (size_t i, dchar p; pattern)
5209 {
5210 if (p == '^' && i == 0)
5211 {
5212 result = true;
5213 if (i + 1 == pattern.length)
5214 return (c == p); // or should this be an error?
5215 }
5216 else if (range)
5217 {
5218 range = 0;
5219 if (lastc <= c && c <= p || c == p)
5220 return !result;
5221 }
5222 else if (p == '-' && i > result && i + 1 < pattern.length)
5223 {
5224 range = 1;
5225 continue;
5226 }
5227 else if (c == p)
5228 return !result;
5229 lastc = p;
5230 }
5231 return result;
5232 }
5233
5234
5235 deprecated
5236 @safe pure @nogc unittest
5237 {
5238 import std.conv : to;
5239 import std.exception : assertCTFEable;
5240
5241 assertCTFEable!(
5242 {
5243 assert(inPattern('x', "x") == 1);
5244 assert(inPattern('x', "y") == 0);
5245 assert(inPattern('x', string.init) == 0);
5246 assert(inPattern('x', "^y") == 1);
5247 assert(inPattern('x', "yxxy") == 1);
5248 assert(inPattern('x', "^yxxy") == 0);
5249 assert(inPattern('x', "^abcd") == 1);
5250 assert(inPattern('^', "^^") == 0);
5251 assert(inPattern('^', "^") == 1);
5252 assert(inPattern('^', "a^") == 1);
5253 assert(inPattern('x', "a-z") == 1);
5254 assert(inPattern('x', "A-Z") == 0);
5255 assert(inPattern('x', "^a-z") == 0);
5256 assert(inPattern('x', "^A-Z") == 1);
5257 assert(inPattern('-', "a-") == 1);
5258 assert(inPattern('-', "^A-") == 0);
5259 assert(inPattern('a', "z-a") == 1);
5260 assert(inPattern('z', "z-a") == 1);
5261 assert(inPattern('x', "z-a") == 0);
5262 });
5263 }
5264
5265 //@@@DEPRECATED_2018-05@@@
5266 /***********************************************
5267 * $(RED This function is deprecated and will be removed May 2018.)
5268 * Please use the functions in $(MREF std, regex) and $(MREF std, algorithm)
5269 * instead. If you still need this function, it will be available in
5270 * $(LINK2 https://github.com/dlang/undeaD, undeaD).
5271 *
5272 * See if character c is in the intersection of the patterns.
5273 */
5274 deprecated("This function is obsolete and will be removed May 2018. See the docs for more details")
5275 bool inPattern(S)(dchar c, S[] patterns) @safe pure @nogc
5276 if (isSomeString!S)
5277 {
5278 foreach (string pattern; patterns)
5279 {
5280 if (!inPattern(c, pattern))
5281 {
5282 return false;
5283 }
5284 }
5285 return true;
5286 }
5287
5288 //@@@DEPRECATED_2018-05@@@
5289 /********************************************
5290 * $(RED This function is deprecated and will be removed May 2018.)
5291 * Please use the functions in $(MREF std, regex) and $(MREF std, algorithm)
5292 * instead. If you still need this function, it will be available in
5293 * $(LINK2 https://github.com/dlang/undeaD, undeaD).
5294 *
5295 * Count characters in s that match pattern.
5296 */
5297 deprecated("This function is obsolete and will be removed May 2018. See the docs for more details")
5298 size_t countchars(S, S1)(S s, in S1 pattern) @safe pure @nogc
5299 if (isSomeString!S && isSomeString!S1)
5300 {
5301 size_t count;
5302 foreach (dchar c; s)
5303 {
5304 count += inPattern(c, pattern);
5305 }
5306 return count;
5307 }
5308
5309 deprecated
5310 @safe pure @nogc unittest
5311 {
5312 import std.conv : to;
5313 import std.exception : assertCTFEable;
5314
5315 assertCTFEable!(
5316 {
5317 assert(countchars("abc", "a-c") == 3);
5318 assert(countchars("hello world", "or") == 3);
5319 });
5320 }
5321
5322 //@@@DEPRECATED_2018-05@@@
5323 /********************************************
5324 * $(RED This function is deprecated and will be removed May 2018.)
5325 * Please use the functions in $(MREF std, regex) and $(MREF std, algorithm)
5326 * instead. If you still need this function, it will be available in
5327 * $(LINK2 https://github.com/dlang/undeaD, undeaD).
5328 *
5329 * Return string that is s with all characters removed that match pattern.
5330 */
5331 deprecated("This function is obsolete and will be removed May 2018. See the docs for more details")
5332 S removechars(S)(S s, in S pattern) @safe pure
5333 if (isSomeString!S)
5334 {
5335 import std.utf : encode;
5336
5337 Unqual!(typeof(s[0]))[] r;
5338 bool changed = false;
5339
5340 foreach (size_t i, dchar c; s)
5341 {
5342 if (inPattern(c, pattern))
5343 {
5344 if (!changed)
5345 {
5346 changed = true;
5347 r = s[0 .. i].dup;
5348 }
5349 continue;
5350 }
5351 if (changed)
5352 {
5353 encode(r, c);
5354 }
5355 }
5356 if (changed)
5357 return r;
5358 else
5359 return s;
5360 }
5361
5362 deprecated
5363 @safe pure unittest
5364 {
5365 import std.conv : to;
5366 import std.exception : assertCTFEable;
5367
5368 assertCTFEable!(
5369 {
5370 assert(removechars("abc", "a-c").length == 0);
5371 assert(removechars("hello world", "or") == "hell wld");
5372 assert(removechars("hello world", "d") == "hello worl");
5373 assert(removechars("hah", "h") == "a");
5374 });
5375 }
5376
5377 deprecated
5378 @safe pure unittest
5379 {
5380 assert(removechars("abc", "x") == "abc");
5381 }
5382
5383 //@@@DEPRECATED_2018-05@@@
5384 /***************************************************
5385 * $(RED This function is deprecated and will be removed May 2018.)
5386 * Please use the functions in $(MREF std, regex) and $(MREF std, algorithm)
5387 * instead. If you still need this function, it will be available in
5388 * $(LINK2 https://github.com/dlang/undeaD, undeaD).
5389 *
5390 * Return string where sequences of a character in s[] from pattern[]
5391 * are replaced with a single instance of that character.
5392 * If pattern is null, it defaults to all characters.
5393 */
5394 deprecated("This function is obsolete and will be removed May 2018. See the docs for more details")
5395 S squeeze(S)(S s, in S pattern = null)
5396 {
5397 import std.utf : encode, stride;
5398
5399 Unqual!(typeof(s[0]))[] r;
5400 dchar lastc;
5401 size_t lasti;
5402 int run;
5403 bool changed;
5404
5405 foreach (size_t i, dchar c; s)
5406 {
5407 if (run && lastc == c)
5408 {
5409 changed = true;
5410 }
5411 else if (pattern is null || inPattern(c, pattern))
5412 {
5413 run = 1;
5414 if (changed)
5415 {
5416 if (r is null)
5417 r = s[0 .. lasti].dup;
5418 encode(r, c);
5419 }
5420 else
5421 lasti = i + stride(s, i);
5422 lastc = c;
5423 }
5424 else
5425 {
5426 run = 0;
5427 if (changed)
5428 {
5429 if (r is null)
5430 r = s[0 .. lasti].dup;
5431 encode(r, c);
5432 }
5433 }
5434 }
5435 return changed ? ((r is null) ? s[0 .. lasti] : cast(S) r) : s;
5436 }
5437
5438 deprecated
5439 @system pure unittest
5440 {
5441 import std.conv : to;
5442 import std.exception : assertCTFEable;
5443
5444 assertCTFEable!(
5445 {
5446 string s;
5447
5448 assert(squeeze("hello") == "helo");
5449
5450 s = "abcd";
5451 assert(squeeze(s) is s);
5452 s = "xyzz";
5453 assert(squeeze(s).ptr == s.ptr); // should just be a slice
5454
5455 assert(squeeze("hello goodbyee", "oe") == "hello godbye");
5456 });
5457 }
5458
5459 //@@@DEPRECATED_2018-05@@@
5460 /***************************************************************
5461 $(RED This function is deprecated and will be removed May 2018.)
5462 Please use the functions in $(MREF std, regex) and $(MREF std, algorithm)
5463 instead. If you still need this function, it will be available in
5464 $(LINK2 https://github.com/dlang/undeaD, undeaD).
5465
5466 Finds the position $(D_PARAM pos) of the first character in $(D_PARAM
5467 s) that does not match $(D_PARAM pattern) (in the terminology used by
5468 $(REF inPattern, std,string)). Updates $(D_PARAM s =
5469 s[pos..$]). Returns the slice from the beginning of the original
5470 (before update) string up to, and excluding, $(D_PARAM pos).
5471
5472 The $(D_PARAM munch) function is mostly convenient for skipping
5473 certain category of characters (e.g. whitespace) when parsing
5474 strings. (In such cases, the return value is not used.)
5475 */
5476 deprecated("This function is obsolete and will be removed May 2018. See the docs for more details")
5477 S1 munch(S1, S2)(ref S1 s, S2 pattern) @safe pure @nogc
5478 {
5479 size_t j = s.length;
5480 foreach (i, dchar c; s)
5481 {
5482 if (!inPattern(c, pattern))
5483 {
5484 j = i;
5485 break;
5486 }
5487 }
5488 scope(exit) s = s[j .. $];
5489 return s[0 .. j];
5490 }
5491
5492 ///
5493 deprecated
5494 @safe pure @nogc unittest
5495 {
5496 string s = "123abc";
5497 string t = munch(s, "0123456789");
5498 assert(t == "123" && s == "abc");
5499 t = munch(s, "0123456789");
5500 assert(t == "" && s == "abc");
5501 }
5502
5503 deprecated
5504 @safe pure @nogc unittest
5505 {
5506 string s = "123€abc";
5507 string t = munch(s, "0123456789");
5508 assert(t == "123" && s == "€abc");
5509 t = munch(s, "0123456789");
5510 assert(t == "" && s == "€abc");
5511 t = munch(s, "£$€¥");
5512 assert(t == "€" && s == "abc");
5513 }
5514
5515
5516 /**********************************************
5517 * Return string that is the 'successor' to s[].
5518 * If the rightmost character is a-zA-Z0-9, it is incremented within
5519 * its case or digits. If it generates a carry, the process is
5520 * repeated with the one to its immediate left.
5521 */
5522
5523 S succ(S)(S s) @safe pure
5524 if (isSomeString!S)
5525 {
5526 import std.ascii : isAlphaNum;
5527
5528 if (s.length && isAlphaNum(s[$ - 1]))
5529 {
5530 auto r = s.dup;
5531 size_t i = r.length - 1;
5532
5533 while (1)
5534 {
5535 dchar c = s[i];
5536 dchar carry;
5537
5538 switch (c)
5539 {
5540 case '9':
5541 c = '0';
5542 carry = '1';
5543 goto Lcarry;
5544 case 'z':
5545 case 'Z':
5546 c -= 'Z' - 'A';
5547 carry = c;
5548 Lcarry:
5549 r[i] = cast(char) c;
5550 if (i == 0)
5551 {
5552 auto t = new typeof(r[0])[r.length + 1];
5553 t[0] = cast(char) carry;
5554 t[1 .. $] = r[];
5555 return t;
5556 }
5557 i--;
5558 break;
5559
5560 default:
5561 if (isAlphaNum(c))
5562 r[i]++;
5563 return r;
5564 }
5565 }
5566 }
5567 return s;
5568 }
5569
5570 ///
5571 @safe pure unittest
5572 {
5573 assert(succ("1") == "2");
5574 assert(succ("9") == "10");
5575 assert(succ("999") == "1000");
5576 assert(succ("zz99") == "aaa00");
5577 }
5578
5579 @safe pure unittest
5580 {
5581 import std.conv : to;
5582 import std.exception : assertCTFEable;
5583
5584 assertCTFEable!(
5585 {
5586 assert(succ(string.init) is null);
5587 assert(succ("!@#$%") == "!@#$%");
5588 assert(succ("1") == "2");
5589 assert(succ("9") == "10");
5590 assert(succ("999") == "1000");
5591 assert(succ("zz99") == "aaa00");
5592 });
5593 }
5594
5595
5596 /++
5597 Replaces the characters in $(D str) which are in $(D from) with the
5598 the corresponding characters in $(D to) and returns the resulting string.
5599
5600 $(D tr) is based on
5601 $(HTTP pubs.opengroup.org/onlinepubs/9699919799/utilities/_tr.html, Posix's tr),
5602 though it doesn't do everything that the Posix utility does.
5603
5604 Params:
5605 str = The original string.
5606 from = The characters to replace.
5607 to = The characters to replace with.
5608 modifiers = String containing modifiers.
5609
5610 Modifiers:
5611 $(BOOKTABLE,
5612 $(TR $(TD Modifier) $(TD Description))
5613 $(TR $(TD $(D 'c')) $(TD Complement the list of characters in $(D from)))
5614 $(TR $(TD $(D 'd')) $(TD Removes matching characters with no corresponding
5615 replacement in $(D to)))
5616 $(TR $(TD $(D 's')) $(TD Removes adjacent duplicates in the replaced
5617 characters))
5618 )
5619
5620 If the modifier $(D 'd') is present, then the number of characters in
5621 $(D to) may be only $(D 0) or $(D 1).
5622
5623 If the modifier $(D 'd') is $(I not) present, and $(D to) is empty, then
5624 $(D to) is taken to be the same as $(D from).
5625
5626 If the modifier $(D 'd') is $(I not) present, and $(D to) is shorter than
5627 $(D from), then $(D to) is extended by replicating the last character in
5628 $(D to).
5629
5630 Both $(D from) and $(D to) may contain ranges using the $(D '-') character
5631 (e.g. $(D "a-d") is synonymous with $(D "abcd").) Neither accept a leading
5632 $(D '^') as meaning the complement of the string (use the $(D 'c') modifier
5633 for that).
5634 +/
5635 C1[] tr(C1, C2, C3, C4 = immutable char)
5636 (C1[] str, const(C2)[] from, const(C3)[] to, const(C4)[] modifiers = null)
5637 {
5638 import std.array : appender;
5639 import std.conv : conv_to = to;
5640 import std.utf : decode;
5641
5642 bool mod_c;
5643 bool mod_d;
5644 bool mod_s;
5645
5646 foreach (char c; modifiers)
5647 {
5648 switch (c)
5649 {
5650 case 'c': mod_c = 1; break; // complement
5651 case 'd': mod_d = 1; break; // delete unreplaced chars
5652 case 's': mod_s = 1; break; // squeeze duplicated replaced chars
5653 default: assert(0);
5654 }
5655 }
5656
5657 if (to.empty && !mod_d)
5658 to = conv_to!(typeof(to))(from);
5659
5660 auto result = appender!(C1[])();
5661 bool modified;
5662 dchar lastc;
5663
5664 foreach (dchar c; str)
5665 {
5666 dchar lastf;
5667 dchar lastt;
5668 dchar newc;
5669 int n = 0;
5670
5671 for (size_t i = 0; i < from.length; )
5672 {
5673 immutable f = decode(from, i);
5674 if (f == '-' && lastf != dchar.init && i < from.length)
5675 {
5676 immutable nextf = decode(from, i);
5677 if (lastf <= c && c <= nextf)
5678 {
5679 n += c - lastf - 1;
5680 if (mod_c)
5681 goto Lnotfound;
5682 goto Lfound;
5683 }
5684 n += nextf - lastf;
5685 lastf = lastf.init;
5686 continue;
5687 }
5688
5689 if (c == f)
5690 { if (mod_c)
5691 goto Lnotfound;
5692 goto Lfound;
5693 }
5694 lastf = f;
5695 n++;
5696 }
5697 if (!mod_c)
5698 goto Lnotfound;
5699 n = 0; // consider it 'found' at position 0
5700
5701 Lfound:
5702
5703 // Find the nth character in to[]
5704 dchar nextt;
5705 for (size_t i = 0; i < to.length; )
5706 {
5707 immutable t = decode(to, i);
5708 if (t == '-' && lastt != dchar.init && i < to.length)
5709 {
5710 nextt = decode(to, i);
5711 n -= nextt - lastt;
5712 if (n < 0)
5713 {
5714 newc = nextt + n + 1;
5715 goto Lnewc;
5716 }
5717 lastt = dchar.init;
5718 continue;
5719 }
5720 if (n == 0)
5721 { newc = t;
5722 goto Lnewc;
5723 }
5724 lastt = t;
5725 nextt = t;
5726 n--;
5727 }
5728 if (mod_d)
5729 continue;
5730 newc = nextt;
5731
5732 Lnewc:
5733 if (mod_s && modified && newc == lastc)
5734 continue;
5735 result.put(newc);
5736 assert(newc != dchar.init);
5737 modified = true;
5738 lastc = newc;
5739 continue;
5740
5741 Lnotfound:
5742 result.put(c);
5743 lastc = c;
5744 modified = false;
5745 }
5746
5747 return result.data;
5748 }
5749
5750 @safe pure unittest
5751 {
5752 import std.algorithm.comparison : equal;
5753 import std.conv : to;
5754 import std.exception : assertCTFEable;
5755
5756 // Complete list of test types; too slow to test'em all
5757 // alias TestTypes = AliasSeq!(
5758 // char[], const( char)[], immutable( char)[],
5759 // wchar[], const(wchar)[], immutable(wchar)[],
5760 // dchar[], const(dchar)[], immutable(dchar)[]);
5761
5762 // Reduced list of test types
5763 alias TestTypes = AliasSeq!(char[], const(wchar)[], immutable(dchar)[]);
5764
5765 assertCTFEable!(
5766 {
5767 foreach (S; TestTypes)
5768 {
5769 foreach (T; TestTypes)
5770 {
5771 foreach (U; TestTypes)
5772 {
5773 assert(equal(tr(to!S("abcdef"), to!T("cd"), to!U("CD")), "abCDef"));
5774 assert(equal(tr(to!S("abcdef"), to!T("b-d"), to!U("B-D")), "aBCDef"));
5775 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-Dx")), "aBCDefgx"));
5776 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-CDx")), "aBCDefgx"));
5777 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-BCDx")), "aBCDefgx"));
5778 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U("*"), to!S("c")), "****ef"));
5779 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U(""), to!T("d")), "abcd"));
5780 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U(""), to!U("s")), "helo godbye"));
5781 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U("x"), "s"), "hex gxdbye"));
5782 assert(equal(tr(to!S("14-Jul-87"), to!T("a-zA-Z"), to!U(" "), "cs"), " Jul "));
5783 assert(equal(tr(to!S("Abc"), to!T("AAA"), to!U("XYZ")), "Xbc"));
5784 }
5785 }
5786
5787 auto s = to!S("hello world");
5788 static assert(is(typeof(s) == typeof(tr(s, "he", "if"))));
5789 }
5790 });
5791 }
5792
5793 @system pure unittest
5794 {
5795 import core.exception : AssertError;
5796 import std.exception : assertThrown;
5797 assertThrown!AssertError(tr("abcdef", "cd", "CD", "X"));
5798 }
5799
5800 /**
5801 * Takes a string $(D s) and determines if it represents a number. This function
5802 * also takes an optional parameter, $(D bAllowSep), which will accept the
5803 * separator characters $(D ',') and $(D '__') within the string. But these
5804 * characters should be stripped from the string before using any
5805 * of the conversion functions like $(D to!int()), $(D to!float()), and etc
5806 * else an error will occur.
5807 *
5808 * Also please note, that no spaces are allowed within the string
5809 * anywhere whether it's a leading, trailing, or embedded space(s),
5810 * thus they too must be stripped from the string before using this
5811 * function, or any of the conversion functions.
5812 *
5813 * Params:
5814 * s = the string or random access range to check
5815 * bAllowSep = accept separator characters or not
5816 *
5817 * Returns:
5818 * $(D bool)
5819 */
5820 bool isNumeric(S)(S s, bool bAllowSep = false)
5821 if (isSomeString!S ||
5822 (isRandomAccessRange!S &&
5823 hasSlicing!S &&
5824 isSomeChar!(ElementType!S) &&
5825 !isInfinite!S))
5826 {
5827 import std.algorithm.comparison : among;
5828 import std.ascii : isASCII;
5829
5830 // ASCII only case insensitive comparison with two ranges
5831 static bool asciiCmp(S1)(S1 a, string b)
5832 {
5833 import std.algorithm.comparison : equal;
5834 import std.algorithm.iteration : map;
5835 import std.ascii : toLower;
5836 import std.utf : byChar;
5837 return a.map!toLower.equal(b.byChar.map!toLower);
5838 }
5839
5840 // auto-decoding special case, we're only comparing characters
5841 // in the ASCII range so there's no reason to decode
5842 static if (isSomeString!S)
5843 {
5844 import std.utf : byCodeUnit;
5845 auto codeUnits = s.byCodeUnit;
5846 }
5847 else
5848 {
5849 alias codeUnits = s;
5850 }
5851
5852 if (codeUnits.empty)
5853 return false;
5854
5855 // Check for NaN (Not a Number) and for Infinity
5856 if (codeUnits.among!((a, b) => asciiCmp(a.save, b))
5857 ("nan", "nani", "nan+nani", "inf", "-inf"))
5858 return true;
5859
5860 immutable frontResult = codeUnits.front;
5861 if (frontResult == '-' || frontResult == '+')
5862 codeUnits.popFront;
5863
5864 immutable iLen = codeUnits.length;
5865 bool bDecimalPoint, bExponent, bComplex, sawDigits;
5866
5867 for (size_t i = 0; i < iLen; i++)
5868 {
5869 immutable c = codeUnits[i];
5870
5871 if (!c.isASCII)
5872 return false;
5873
5874 // Digits are good, skip to the next character
5875 if (c >= '0' && c <= '9')
5876 {
5877 sawDigits = true;
5878 continue;
5879 }
5880
5881 // Check for the complex type, and if found
5882 // reset the flags for checking the 2nd number.
5883 if (c == '+')
5884 {
5885 if (!i)
5886 return false;
5887 bDecimalPoint = false;
5888 bExponent = false;
5889 bComplex = true;
5890 sawDigits = false;
5891 continue;
5892 }
5893
5894 // Allow only one exponent per number
5895 if (c == 'e' || c == 'E')
5896 {
5897 // A 2nd exponent found, return not a number
5898 if (bExponent || i + 1 >= iLen)
5899 return false;
5900 // Look forward for the sign, and if
5901 // missing then this is not a number.
5902 if (codeUnits[i + 1] != '-' && codeUnits[i + 1] != '+')
5903 return false;
5904 bExponent = true;
5905 i++;
5906 continue;
5907 }
5908 // Allow only one decimal point per number to be used
5909 if (c == '.')
5910 {
5911 // A 2nd decimal point found, return not a number
5912 if (bDecimalPoint)
5913 return false;
5914 bDecimalPoint = true;
5915 continue;
5916 }
5917 // Check for ending literal characters: "f,u,l,i,ul,fi,li",
5918 // and whether they're being used with the correct datatype.
5919 if (i == iLen - 2)
5920 {
5921 if (!sawDigits)
5922 return false;
5923 // Integer Whole Number
5924 if (asciiCmp(codeUnits[i .. iLen], "ul") &&
5925 (!bDecimalPoint && !bExponent && !bComplex))
5926 return true;
5927 // Floating-Point Number
5928 if (codeUnits[i .. iLen].among!((a, b) => asciiCmp(a, b))("fi", "li") &&
5929 (bDecimalPoint || bExponent || bComplex))
5930 return true;
5931 if (asciiCmp(codeUnits[i .. iLen], "ul") &&
5932 (bDecimalPoint || bExponent || bComplex))
5933 return false;
5934 // Could be a Integer or a Float, thus
5935 // all these suffixes are valid for both
5936 return codeUnits[i .. iLen].among!((a, b) => asciiCmp(a, b))
5937 ("ul", "fi", "li") != 0;
5938 }
5939 if (i == iLen - 1)
5940 {
5941 if (!sawDigits)
5942 return false;
5943 // Integer Whole Number
5944 if (c.among!('u', 'l', 'U', 'L')() &&
5945 (!bDecimalPoint && !bExponent && !bComplex))
5946 return true;
5947 // Check to see if the last character in the string
5948 // is the required 'i' character
5949 if (bComplex)
5950 return c.among!('i', 'I')() != 0;
5951 // Floating-Point Number
5952 return c.among!('l', 'L', 'f', 'F', 'i', 'I')() != 0;
5953 }
5954 // Check if separators are allowed to be in the numeric string
5955 if (!bAllowSep || !c.among!('_', ',')())
5956 return false;
5957 }
5958
5959 return sawDigits;
5960 }
5961
5962 /**
5963 * Integer Whole Number: (byte, ubyte, short, ushort, int, uint, long, and ulong)
5964 * ['+'|'-']digit(s)[U|L|UL]
5965 */
5966 @safe @nogc pure nothrow unittest
5967 {
5968 assert(isNumeric("123"));
5969 assert(isNumeric("123UL"));
5970 assert(isNumeric("123L"));
5971 assert(isNumeric("+123U"));
5972 assert(isNumeric("-123L"));
5973 }
5974
5975 /**
5976 * Floating-Point Number: (float, double, real, ifloat, idouble, and ireal)
5977 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]]
5978 * or [nan|nani|inf|-inf]
5979 */
5980 @safe @nogc pure nothrow unittest
5981 {
5982 assert(isNumeric("+123"));
5983 assert(isNumeric("-123.01"));
5984 assert(isNumeric("123.3e-10f"));
5985 assert(isNumeric("123.3e-10fi"));
5986 assert(isNumeric("123.3e-10L"));
5987
5988 assert(isNumeric("nan"));
5989 assert(isNumeric("nani"));
5990 assert(isNumeric("-inf"));
5991 }
5992
5993 /**
5994 * Floating-Point Number: (cfloat, cdouble, and creal)
5995 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][+]
5996 * [digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]]
5997 * or [nan|nani|nan+nani|inf|-inf]
5998 */
5999 @safe @nogc pure nothrow unittest
6000 {
6001 assert(isNumeric("-123e-1+456.9e-10Li"));
6002 assert(isNumeric("+123e+10+456i"));
6003 assert(isNumeric("123+456"));
6004 }
6005
6006 @safe @nogc pure nothrow unittest
6007 {
6008 assert(!isNumeric("F"));
6009 assert(!isNumeric("L"));
6010 assert(!isNumeric("U"));
6011 assert(!isNumeric("i"));
6012 assert(!isNumeric("fi"));
6013 assert(!isNumeric("ul"));
6014 assert(!isNumeric("li"));
6015 assert(!isNumeric("."));
6016 assert(!isNumeric("-"));
6017 assert(!isNumeric("+"));
6018 assert(!isNumeric("e-"));
6019 assert(!isNumeric("e+"));
6020 assert(!isNumeric(".f"));
6021 assert(!isNumeric("e+f"));
6022 assert(!isNumeric("++1"));
6023 assert(!isNumeric(""));
6024 assert(!isNumeric("1E+1E+1"));
6025 assert(!isNumeric("1E1"));
6026 assert(!isNumeric("\x81"));
6027 }
6028
6029 // Test string types
6030 @safe unittest
6031 {
6032 import std.conv : to;
6033
6034 foreach (T; AliasSeq!(string, char[], wstring, wchar[], dstring, dchar[]))
6035 {
6036 assert("123".to!T.isNumeric());
6037 assert("123UL".to!T.isNumeric());
6038 assert("123fi".to!T.isNumeric());
6039 assert("123li".to!T.isNumeric());
6040 assert(!"--123L".to!T.isNumeric());
6041 }
6042 }
6043
6044 // test ranges
6045 @system pure unittest
6046 {
6047 import std.range : refRange;
6048 import std.utf : byCodeUnit;
6049
6050 assert("123".byCodeUnit.isNumeric());
6051 assert("123UL".byCodeUnit.isNumeric());
6052 assert("123fi".byCodeUnit.isNumeric());
6053 assert("123li".byCodeUnit.isNumeric());
6054 assert(!"--123L".byCodeUnit.isNumeric());
6055
6056 dstring z = "0";
6057 assert(isNumeric(refRange(&z)));
6058
6059 dstring nani = "nani";
6060 assert(isNumeric(refRange(&nani)));
6061 }
6062
6063 /// isNumeric works with CTFE
6064 @safe pure unittest
6065 {
6066 enum a = isNumeric("123.00E-5+1234.45E-12Li");
6067 enum b = isNumeric("12345xxxx890");
6068
6069 static assert( a);
6070 static assert(!b);
6071 }
6072
6073 @system unittest
6074 {
6075 import std.conv : to;
6076 import std.exception : assertCTFEable;
6077
6078 assertCTFEable!(
6079 {
6080 // Test the isNumeric(in string) function
6081 assert(isNumeric("1") == true );
6082 assert(isNumeric("1.0") == true );
6083 assert(isNumeric("1e-1") == true );
6084 assert(isNumeric("12345xxxx890") == false );
6085 assert(isNumeric("567L") == true );
6086 assert(isNumeric("23UL") == true );
6087 assert(isNumeric("-123..56f") == false );
6088 assert(isNumeric("12.3.5.6") == false );
6089 assert(isNumeric(" 12.356") == false );
6090 assert(isNumeric("123 5.6") == false );
6091 assert(isNumeric("1233E-1+1.0e-1i") == true );
6092
6093 assert(isNumeric("123.00E-5+1234.45E-12Li") == true);
6094 assert(isNumeric("123.00e-5+1234.45E-12iL") == false);
6095 assert(isNumeric("123.00e-5+1234.45e-12uL") == false);
6096 assert(isNumeric("123.00E-5+1234.45e-12lu") == false);
6097
6098 assert(isNumeric("123fi") == true);
6099 assert(isNumeric("123li") == true);
6100 assert(isNumeric("--123L") == false);
6101 assert(isNumeric("+123.5UL") == false);
6102 assert(isNumeric("123f") == true);
6103 assert(isNumeric("123.u") == false);
6104
6105 // @@@BUG@@ to!string(float) is not CTFEable.
6106 // Related: formatValue(T) if (is(FloatingPointTypeOf!T))
6107 if (!__ctfe)
6108 {
6109 assert(isNumeric(to!string(real.nan)) == true);
6110 assert(isNumeric(to!string(-real.infinity)) == true);
6111 assert(isNumeric(to!string(123e+2+1234.78Li)) == true);
6112 }
6113
6114 string s = "$250.99-";
6115 assert(isNumeric(s[1 .. s.length - 2]) == true);
6116 assert(isNumeric(s) == false);
6117 assert(isNumeric(s[0 .. s.length - 1]) == false);
6118 });
6119
6120 assert(!isNumeric("-"));
6121 assert(!isNumeric("+"));
6122 }
6123
6124 /*****************************
6125 * Soundex algorithm.
6126 *
6127 * The Soundex algorithm converts a word into 4 characters
6128 * based on how the word sounds phonetically. The idea is that
6129 * two spellings that sound alike will have the same Soundex
6130 * value, which means that Soundex can be used for fuzzy matching
6131 * of names.
6132 *
6133 * Params:
6134 * str = String or InputRange to convert to Soundex representation.
6135 *
6136 * Returns:
6137 * The four character array with the Soundex result in it.
6138 * The array has zero's in it if there is no Soundex representation for the string.
6139 *
6140 * See_Also:
6141 * $(LINK2 http://en.wikipedia.org/wiki/Soundex, Wikipedia),
6142 * $(LUCKY The Soundex Indexing System)
6143 * $(LREF soundex)
6144 *
6145 * Bugs:
6146 * Only works well with English names.
6147 * There are other arguably better Soundex algorithms,
6148 * but this one is the standard one.
6149 */
6150 char[4] soundexer(Range)(Range str)
6151 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) &&
6152 !isConvertibleToString!Range)
6153 {
6154 alias C = Unqual!(ElementEncodingType!Range);
6155
6156 static immutable dex =
6157 // ABCDEFGHIJKLMNOPQRSTUVWXYZ
6158 "01230120022455012623010202";
6159
6160 char[4] result = void;
6161 size_t b = 0;
6162 C lastc;
6163 foreach (C c; str)
6164 {
6165 if (c >= 'a' && c <= 'z')
6166 c -= 'a' - 'A';
6167 else if (c >= 'A' && c <= 'Z')
6168 {
6169 }
6170 else
6171 {
6172 lastc = lastc.init;
6173 continue;
6174 }
6175 if (b == 0)
6176 {
6177 result[0] = cast(char) c;
6178 b++;
6179 lastc = dex[c - 'A'];
6180 }
6181 else
6182 {
6183 if (c == 'H' || c == 'W')
6184 continue;
6185 if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U')
6186 lastc = lastc.init;
6187 c = dex[c - 'A'];
6188 if (c != '0' && c != lastc)
6189 {
6190 result[b] = cast(char) c;
6191 b++;
6192 lastc = c;
6193 }
6194 if (b == 4)
6195 goto Lret;
6196 }
6197 }
6198 if (b == 0)
6199 result[] = 0;
6200 else
6201 result[b .. 4] = '0';
6202 Lret:
6203 return result;
6204 }
6205
6206 char[4] soundexer(Range)(auto ref Range str)
6207 if (isConvertibleToString!Range)
6208 {
6209 return soundexer!(StringTypeOf!Range)(str);
6210 }
6211
6212 /*****************************
6213 * Like $(LREF soundexer), but with different parameters
6214 * and return value.
6215 *
6216 * Params:
6217 * str = String to convert to Soundex representation.
6218 * buffer = Optional 4 char array to put the resulting Soundex
6219 * characters into. If null, the return value
6220 * buffer will be allocated on the heap.
6221 * Returns:
6222 * The four character array with the Soundex result in it.
6223 * Returns null if there is no Soundex representation for the string.
6224 * See_Also:
6225 * $(LREF soundexer)
6226 */
6227 char[] soundex(const(char)[] str, char[] buffer = null)
6228 @safe pure nothrow
6229 in
6230 {
6231 assert(buffer is null || buffer.length >= 4);
6232 }
6233 out (result)
6234 {
6235 if (result !is null)
6236 {
6237 assert(result.length == 4);
6238 assert(result[0] >= 'A' && result[0] <= 'Z');
6239 foreach (char c; result[1 .. 4])
6240 assert(c >= '0' && c <= '6');
6241 }
6242 }
6243 body
6244 {
6245 char[4] result = soundexer(str);
6246 if (result[0] == 0)
6247 return null;
6248 if (buffer is null)
6249 buffer = new char[4];
6250 buffer[] = result[];
6251 return buffer;
6252 }
6253
6254
6255 @safe pure nothrow unittest
6256 {
6257 import std.exception : assertCTFEable;
6258 assertCTFEable!(
6259 {
6260 char[4] buffer;
6261
6262 assert(soundex(null) == null);
6263 assert(soundex("") == null);
6264 assert(soundex("0123^&^^**&^") == null);
6265 assert(soundex("Euler") == "E460");
6266 assert(soundex(" Ellery ") == "E460");
6267 assert(soundex("Gauss") == "G200");
6268 assert(soundex("Ghosh") == "G200");
6269 assert(soundex("Hilbert") == "H416");
6270 assert(soundex("Heilbronn") == "H416");
6271 assert(soundex("Knuth") == "K530");
6272 assert(soundex("Kant", buffer) == "K530");
6273 assert(soundex("Lloyd") == "L300");
6274 assert(soundex("Ladd") == "L300");
6275 assert(soundex("Lukasiewicz", buffer) == "L222");
6276 assert(soundex("Lissajous") == "L222");
6277 assert(soundex("Robert") == "R163");
6278 assert(soundex("Rupert") == "R163");
6279 assert(soundex("Rubin") == "R150");
6280 assert(soundex("Washington") == "W252");
6281 assert(soundex("Lee") == "L000");
6282 assert(soundex("Gutierrez") == "G362");
6283 assert(soundex("Pfister") == "P236");
6284 assert(soundex("Jackson") == "J250");
6285 assert(soundex("Tymczak") == "T522");
6286 assert(soundex("Ashcraft") == "A261");
6287
6288 assert(soundex("Woo") == "W000");
6289 assert(soundex("Pilgrim") == "P426");
6290 assert(soundex("Flingjingwaller") == "F452");
6291 assert(soundex("PEARSE") == "P620");
6292 assert(soundex("PIERCE") == "P620");
6293 assert(soundex("Price") == "P620");
6294 assert(soundex("CATHY") == "C300");
6295 assert(soundex("KATHY") == "K300");
6296 assert(soundex("Jones") == "J520");
6297 assert(soundex("johnsons") == "J525");
6298 assert(soundex("Hardin") == "H635");
6299 assert(soundex("Martinez") == "M635");
6300
6301 import std.utf : byChar, byDchar, byWchar;
6302 assert(soundexer("Martinez".byChar ) == "M635");
6303 assert(soundexer("Martinez".byWchar) == "M635");
6304 assert(soundexer("Martinez".byDchar) == "M635");
6305 });
6306 }
6307
6308 @safe pure unittest
6309 {
6310 assert(testAliasedString!soundexer("Martinez"));
6311 }
6312
6313
6314 /***************************************************
6315 * Construct an associative array consisting of all
6316 * abbreviations that uniquely map to the strings in values.
6317 *
6318 * This is useful in cases where the user is expected to type
6319 * in one of a known set of strings, and the program will helpfully
6320 * auto-complete the string once sufficient characters have been
6321 * entered that uniquely identify it.
6322 */
6323
6324 string[string] abbrev(string[] values) @safe pure
6325 {
6326 import std.algorithm.sorting : sort;
6327
6328 string[string] result;
6329
6330 // Make a copy when sorting so we follow COW principles.
6331 values = values.dup;
6332 sort(values);
6333
6334 size_t values_length = values.length;
6335 size_t lasti = values_length;
6336 size_t nexti;
6337
6338 string nv;
6339 string lv;
6340
6341 for (size_t i = 0; i < values_length; i = nexti)
6342 {
6343 string value = values[i];
6344
6345 // Skip dups
6346 for (nexti = i + 1; nexti < values_length; nexti++)
6347 {
6348 nv = values[nexti];
6349 if (value != values[nexti])
6350 break;
6351 }
6352
6353 import std.utf : stride;
6354
6355 for (size_t j = 0; j < value.length; j += stride(value, j))
6356 {
6357 string v = value[0 .. j];
6358
6359 if ((nexti == values_length || j > nv.length || v != nv[0 .. j]) &&
6360 (lasti == values_length || j > lv.length || v != lv[0 .. j]))
6361 {
6362 result[v] = value;
6363 }
6364 }
6365 result[value] = value;
6366 lasti = i;
6367 lv = value;
6368 }
6369
6370 return result;
6371 }
6372
6373 ///
6374 @safe unittest
6375 {
6376 import std.string;
6377
6378 static string[] list = [ "food", "foxy" ];
6379 auto abbrevs = abbrev(list);
6380 assert(abbrevs == ["fox": "foxy", "food": "food",
6381 "foxy": "foxy", "foo": "food"]);
6382 }
6383
6384
6385 @system pure unittest
6386 {
6387 import std.algorithm.sorting : sort;
6388 import std.conv : to;
6389 import std.exception : assertCTFEable;
6390
6391 assertCTFEable!(
6392 {
6393 string[] values;
6394 values ~= "hello";
6395 values ~= "hello";
6396 values ~= "he";
6397
6398 string[string] r;
6399
6400 r = abbrev(values);
6401 auto keys = r.keys.dup;
6402 sort(keys);
6403
6404 assert(keys.length == 4);
6405 assert(keys[0] == "he");
6406 assert(keys[1] == "hel");
6407 assert(keys[2] == "hell");
6408 assert(keys[3] == "hello");
6409
6410 assert(r[keys[0]] == "he");
6411 assert(r[keys[1]] == "hello");
6412 assert(r[keys[2]] == "hello");
6413 assert(r[keys[3]] == "hello");
6414 });
6415 }
6416
6417
6418 /******************************************
6419 * Compute _column number at the end of the printed form of the string,
6420 * assuming the string starts in the leftmost _column, which is numbered
6421 * starting from 0.
6422 *
6423 * Tab characters are expanded into enough spaces to bring the _column number
6424 * to the next multiple of tabsize.
6425 * If there are multiple lines in the string, the _column number of the last
6426 * line is returned.
6427 *
6428 * Params:
6429 * str = string or InputRange to be analyzed
6430 * tabsize = number of columns a tab character represents
6431 *
6432 * Returns:
6433 * column number
6434 */
6435
6436 size_t column(Range)(Range str, in size_t tabsize = 8)
6437 if ((isInputRange!Range && isSomeChar!(Unqual!(ElementEncodingType!Range)) ||
6438 isNarrowString!Range) &&
6439 !isConvertibleToString!Range)
6440 {
6441 static if (is(Unqual!(ElementEncodingType!Range) == char))
6442 {
6443 // decoding needed for chars
6444 import std.utf : byDchar;
6445
6446 return str.byDchar.column(tabsize);
6447 }
6448 else
6449 {
6450 // decoding not needed for wchars and dchars
6451 import std.uni : lineSep, paraSep, nelSep;
6452
6453 size_t column;
6454
6455 foreach (const c; str)
6456 {
6457 switch (c)
6458 {
6459 case '\t':
6460 column = (column + tabsize) / tabsize * tabsize;
6461 break;
6462
6463 case '\r':
6464 case '\n':
6465 case paraSep:
6466 case lineSep:
6467 case nelSep:
6468 column = 0;
6469 break;
6470
6471 default:
6472 column++;
6473 break;
6474 }
6475 }
6476 return column;
6477 }
6478 }
6479
6480 ///
6481 @safe pure unittest
6482 {
6483 import std.utf : byChar, byWchar, byDchar;
6484
6485 assert(column("1234 ") == 5);
6486 assert(column("1234 "w) == 5);
6487 assert(column("1234 "d) == 5);
6488
6489 assert(column("1234 ".byChar()) == 5);
6490 assert(column("1234 "w.byWchar()) == 5);
6491 assert(column("1234 "d.byDchar()) == 5);
6492
6493 // Tab stops are set at 8 spaces by default; tab characters insert enough
6494 // spaces to bring the column position to the next multiple of 8.
6495 assert(column("\t") == 8);
6496 assert(column("1\t") == 8);
6497 assert(column("\t1") == 9);
6498 assert(column("123\t") == 8);
6499
6500 // Other tab widths are possible by specifying it explicitly:
6501 assert(column("\t", 4) == 4);
6502 assert(column("1\t", 4) == 4);
6503 assert(column("\t1", 4) == 5);
6504 assert(column("123\t", 4) == 4);
6505
6506 // New lines reset the column number.
6507 assert(column("abc\n") == 0);
6508 assert(column("abc\n1") == 1);
6509 assert(column("abcdefg\r1234") == 4);
6510 assert(column("abc\u20281") == 1);
6511 assert(column("abc\u20291") == 1);
6512 assert(column("abc\u00851") == 1);
6513 assert(column("abc\u00861") == 5);
6514 }
6515
6516 size_t column(Range)(auto ref Range str, in size_t tabsize = 8)
6517 if (isConvertibleToString!Range)
6518 {
6519 return column!(StringTypeOf!Range)(str, tabsize);
6520 }
6521
6522 @safe pure unittest
6523 {
6524 assert(testAliasedString!column("abc\u00861"));
6525 }
6526
6527 @safe @nogc unittest
6528 {
6529 import std.conv : to;
6530 import std.exception : assertCTFEable;
6531
6532 assertCTFEable!(
6533 {
6534 assert(column(string.init) == 0);
6535 assert(column("") == 0);
6536 assert(column("\t") == 8);
6537 assert(column("abc\t") == 8);
6538 assert(column("12345678\t") == 16);
6539 });
6540 }
6541
6542 /******************************************
6543 * Wrap text into a paragraph.
6544 *
6545 * The input text string s is formed into a paragraph
6546 * by breaking it up into a sequence of lines, delineated
6547 * by \n, such that the number of columns is not exceeded
6548 * on each line.
6549 * The last line is terminated with a \n.
6550 * Params:
6551 * s = text string to be wrapped
6552 * columns = maximum number of _columns in the paragraph
6553 * firstindent = string used to _indent first line of the paragraph
6554 * indent = string to use to _indent following lines of the paragraph
6555 * tabsize = column spacing of tabs in firstindent[] and indent[]
6556 * Returns:
6557 * resulting paragraph as an allocated string
6558 */
6559
6560 S wrap(S)(S s, in size_t columns = 80, S firstindent = null,
6561 S indent = null, in size_t tabsize = 8)
6562 if (isSomeString!S)
6563 {
6564 import std.uni : isWhite;
6565 typeof(s.dup) result;
6566 bool inword;
6567 bool first = true;
6568 size_t wordstart;
6569
6570 const indentcol = column(indent, tabsize);
6571
6572 result.length = firstindent.length + s.length;
6573 result.length = firstindent.length;
6574 result[] = firstindent[];
6575 auto col = column(firstindent, tabsize);
6576 foreach (size_t i, dchar c; s)
6577 {
6578 if (isWhite(c))
6579 {
6580 if (inword)
6581 {
6582 if (first)
6583 {
6584 }
6585 else if (col + 1 + (i - wordstart) > columns)
6586 {
6587 result ~= '\n';
6588 result ~= indent;
6589 col = indentcol;
6590 }
6591 else
6592 {
6593 result ~= ' ';
6594 col += 1;
6595 }
6596 result ~= s[wordstart .. i];
6597 col += i - wordstart;
6598 inword = false;
6599 first = false;
6600 }
6601 }
6602 else
6603 {
6604 if (!inword)
6605 {
6606 wordstart = i;
6607 inword = true;
6608 }
6609 }
6610 }
6611
6612 if (inword)
6613 {
6614 if (col + 1 + (s.length - wordstart) >= columns)
6615 {
6616 result ~= '\n';
6617 result ~= indent;
6618 }
6619 else if (result.length != firstindent.length)
6620 result ~= ' ';
6621 result ~= s[wordstart .. s.length];
6622 }
6623 result ~= '\n';
6624
6625 return result;
6626 }
6627
6628 ///
6629 @safe pure unittest
6630 {
6631 assert(wrap("a short string", 7) == "a short\nstring\n");
6632
6633 // wrap will not break inside of a word, but at the next space
6634 assert(wrap("a short string", 4) == "a\nshort\nstring\n");
6635
6636 assert(wrap("a short string", 7, "\t") == "\ta\nshort\nstring\n");
6637 assert(wrap("a short string", 7, "\t", " ") == "\ta\n short\n string\n");
6638 }
6639
6640 @safe pure unittest
6641 {
6642 import std.conv : to;
6643 import std.exception : assertCTFEable;
6644
6645 assertCTFEable!(
6646 {
6647 assert(wrap(string.init) == "\n");
6648 assert(wrap(" a b df ") == "a b df\n");
6649 assert(wrap(" a b df ", 3) == "a b\ndf\n");
6650 assert(wrap(" a bc df ", 3) == "a\nbc\ndf\n");
6651 assert(wrap(" abcd df ", 3) == "abcd\ndf\n");
6652 assert(wrap("x") == "x\n");
6653 assert(wrap("u u") == "u u\n");
6654 assert(wrap("abcd", 3) == "\nabcd\n");
6655 assert(wrap("a de", 10, "\t", " ", 8) == "\ta\n de\n");
6656 });
6657 }
6658
6659 /******************************************
6660 * Removes one level of indentation from a multi-line string.
6661 *
6662 * This uniformly outdents the text as much as possible.
6663 * Whitespace-only lines are always converted to blank lines.
6664 *
6665 * Does not allocate memory if it does not throw.
6666 *
6667 * Params:
6668 * str = multi-line string
6669 *
6670 * Returns:
6671 * outdented string
6672 *
6673 * Throws:
6674 * StringException if indentation is done with different sequences
6675 * of whitespace characters.
6676 */
6677 S outdent(S)(S str) @safe pure
6678 if (isSomeString!S)
6679 {
6680 return str.splitLines(Yes.keepTerminator).outdent().join();
6681 }
6682
6683 ///
6684 @safe pure unittest
6685 {
6686 enum pretty = q{
6687 import std.stdio;
6688 void main() {
6689 writeln("Hello");
6690 }
6691 }.outdent();
6692
6693 enum ugly = q{
6694 import std.stdio;
6695 void main() {
6696 writeln("Hello");
6697 }
6698 };
6699
6700 assert(pretty == ugly);
6701 }
6702
6703
6704 /******************************************
6705 * Removes one level of indentation from an array of single-line strings.
6706 *
6707 * This uniformly outdents the text as much as possible.
6708 * Whitespace-only lines are always converted to blank lines.
6709 *
6710 * Params:
6711 * lines = array of single-line strings
6712 *
6713 * Returns:
6714 * lines[] is rewritten in place with outdented lines
6715 *
6716 * Throws:
6717 * StringException if indentation is done with different sequences
6718 * of whitespace characters.
6719 */
6720 S[] outdent(S)(S[] lines) @safe pure
6721 if (isSomeString!S)
6722 {
6723 import std.algorithm.searching : startsWith;
6724
6725 if (lines.empty)
6726 {
6727 return null;
6728 }
6729
6730 static S leadingWhiteOf(S str)
6731 {
6732 return str[ 0 .. $ - stripLeft(str).length ];
6733 }
6734
6735 S shortestIndent;
6736 foreach (ref line; lines)
6737 {
6738 const stripped = line.stripLeft();
6739
6740 if (stripped.empty)
6741 {
6742 line = line[line.chomp().length .. $];
6743 }
6744 else
6745 {
6746 const indent = leadingWhiteOf(line);
6747
6748 // Comparing number of code units instead of code points is OK here
6749 // because this function throws upon inconsistent indentation.
6750 if (shortestIndent is null || indent.length < shortestIndent.length)
6751 {
6752 if (indent.empty)
6753 return lines;
6754 shortestIndent = indent;
6755 }
6756 }
6757 }
6758
6759 foreach (ref line; lines)
6760 {
6761 const stripped = line.stripLeft();
6762
6763 if (stripped.empty)
6764 {
6765 // Do nothing
6766 }
6767 else if (line.startsWith(shortestIndent))
6768 {
6769 line = line[shortestIndent.length .. $];
6770 }
6771 else
6772 {
6773 throw new StringException("outdent: Inconsistent indentation");
6774 }
6775 }
6776
6777 return lines;
6778 }
6779
6780 @safe pure unittest
6781 {
6782 import std.conv : to;
6783 import std.exception : assertCTFEable;
6784
6785 template outdent_testStr(S)
6786 {
6787 enum S outdent_testStr =
6788 "
6789 \t\tX
6790 \t\U00010143X
6791 \t\t
6792
6793 \t\t\tX
6794 \t ";
6795 }
6796
6797 template outdent_expected(S)
6798 {
6799 enum S outdent_expected =
6800 "
6801 \tX
6802 \U00010143X
6803
6804
6805 \t\tX
6806 ";
6807 }
6808
6809 assertCTFEable!(
6810 {
6811
6812 foreach (S; AliasSeq!(string, wstring, dstring))
6813 {
6814 enum S blank = "";
6815 assert(blank.outdent() == blank);
6816 static assert(blank.outdent() == blank);
6817
6818 enum S testStr1 = " \n \t\n ";
6819 enum S expected1 = "\n\n";
6820 assert(testStr1.outdent() == expected1);
6821 static assert(testStr1.outdent() == expected1);
6822
6823 assert(testStr1[0..$-1].outdent() == expected1);
6824 static assert(testStr1[0..$-1].outdent() == expected1);
6825
6826 enum S testStr2 = "a\n \t\nb";
6827 assert(testStr2.outdent() == testStr2);
6828 static assert(testStr2.outdent() == testStr2);
6829
6830 enum S testStr3 =
6831 "
6832 \t\tX
6833 \t\U00010143X
6834 \t\t
6835
6836 \t\t\tX
6837 \t ";
6838
6839 enum S expected3 =
6840 "
6841 \tX
6842 \U00010143X
6843
6844
6845 \t\tX
6846 ";
6847 assert(testStr3.outdent() == expected3);
6848 static assert(testStr3.outdent() == expected3);
6849
6850 enum testStr4 = " X\r X\n X\r\n X\u2028 X\u2029 X";
6851 enum expected4 = "X\rX\nX\r\nX\u2028X\u2029X";
6852 assert(testStr4.outdent() == expected4);
6853 static assert(testStr4.outdent() == expected4);
6854
6855 enum testStr5 = testStr4[0..$-1];
6856 enum expected5 = expected4[0..$-1];
6857 assert(testStr5.outdent() == expected5);
6858 static assert(testStr5.outdent() == expected5);
6859
6860 enum testStr6 = " \r \n \r\n \u2028 \u2029";
6861 enum expected6 = "\r\n\r\n\u2028\u2029";
6862 assert(testStr6.outdent() == expected6);
6863 static assert(testStr6.outdent() == expected6);
6864
6865 enum testStr7 = " a \n b ";
6866 enum expected7 = "a \nb ";
6867 assert(testStr7.outdent() == expected7);
6868 static assert(testStr7.outdent() == expected7);
6869 }
6870 });
6871 }
6872
6873 @safe pure unittest
6874 {
6875 import std.exception : assertThrown;
6876 auto bad = " a\n\tb\n c";
6877 assertThrown!StringException(bad.outdent);
6878 }
6879
6880 /** Assume the given array of integers $(D arr) is a well-formed UTF string and
6881 return it typed as a UTF string.
6882
6883 $(D ubyte) becomes $(D char), $(D ushort) becomes $(D wchar) and $(D uint)
6884 becomes $(D dchar). Type qualifiers are preserved.
6885
6886 When compiled with debug mode, this function performs an extra check to make
6887 sure the return value is a valid Unicode string.
6888
6889 Params:
6890 arr = array of bytes, ubytes, shorts, ushorts, ints, or uints
6891
6892 Returns:
6893 arr retyped as an array of chars, wchars, or dchars
6894
6895 See_Also: $(LREF representation)
6896 */
6897 auto assumeUTF(T)(T[] arr) pure
6898 if (staticIndexOf!(Unqual!T, ubyte, ushort, uint) != -1)
6899 {
6900 import std.traits : ModifyTypePreservingTQ;
6901 import std.utf : validate;
6902 alias ToUTFType(U) = AliasSeq!(char, wchar, dchar)[U.sizeof / 2];
6903 auto asUTF = cast(ModifyTypePreservingTQ!(ToUTFType, T)[])arr;
6904 debug validate(asUTF);
6905 return asUTF;
6906 }
6907
6908 ///
6909 @safe pure unittest
6910 {
6911 string a = "Hölo World";
6912 immutable(ubyte)[] b = a.representation;
6913 string c = b.assumeUTF;
6914
6915 assert(a == c);
6916 }
6917
6918 pure @system unittest
6919 {
6920 import std.algorithm.comparison : equal;
6921 foreach (T; AliasSeq!(char[], wchar[], dchar[]))
6922 {
6923 immutable T jti = "Hello World";
6924 T jt = jti.dup;
6925
6926 static if (is(T == char[]))
6927 {
6928 auto gt = cast(ubyte[]) jt;
6929 auto gtc = cast(const(ubyte)[])jt;
6930 auto gti = cast(immutable(ubyte)[])jt;
6931 }
6932 else static if (is(T == wchar[]))
6933 {
6934 auto gt = cast(ushort[]) jt;
6935 auto gtc = cast(const(ushort)[])jt;
6936 auto gti = cast(immutable(ushort)[])jt;
6937 }
6938 else static if (is(T == dchar[]))
6939 {
6940 auto gt = cast(uint[]) jt;
6941 auto gtc = cast(const(uint)[])jt;
6942 auto gti = cast(immutable(uint)[])jt;
6943 }
6944
6945 auto ht = assumeUTF(gt);
6946 auto htc = assumeUTF(gtc);
6947 auto hti = assumeUTF(gti);
6948 assert(equal(jt, ht));
6949 assert(equal(jt, htc));
6950 assert(equal(jt, hti));
6951 }
6952 }