]>
Commit | Line | Data |
---|---|---|
7a938933 ILT |
1 | // Copyright 2009 The Go Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style | |
3 | // license that can be found in the LICENSE file. | |
4 | ||
9ff56c95 ILT |
5 | // Package bytes implements functions for the manipulation of byte slices. |
6 | // It is analogous to the facilities of the strings package. | |
7a938933 ILT |
7 | package bytes |
8 | ||
9 | import ( | |
10 | "unicode" | |
9c63abc9 | 11 | "unicode/utf8" |
7a938933 ILT |
12 | ) |
13 | ||
14 | // Compare returns an integer comparing the two byte arrays lexicographically. | |
15 | // The result will be 0 if a==b, -1 if a < b, and +1 if a > b | |
501699af | 16 | // A nil argument is equivalent to an empty slice. |
7a938933 ILT |
17 | func Compare(a, b []byte) int { |
18 | m := len(a) | |
19 | if m > len(b) { | |
20 | m = len(b) | |
21 | } | |
22 | for i, ac := range a[0:m] { | |
23 | bc := b[i] | |
24 | switch { | |
25 | case ac > bc: | |
26 | return 1 | |
27 | case ac < bc: | |
28 | return -1 | |
29 | } | |
30 | } | |
31 | switch { | |
32 | case len(a) < len(b): | |
33 | return -1 | |
34 | case len(a) > len(b): | |
35 | return 1 | |
36 | } | |
37 | return 0 | |
38 | } | |
39 | ||
40 | // Equal returns a boolean reporting whether a == b. | |
501699af | 41 | // A nil argument is equivalent to an empty slice. |
9a0e3259 ILT |
42 | func Equal(a, b []byte) bool |
43 | ||
44 | func equalPortable(a, b []byte) bool { | |
7a938933 ILT |
45 | if len(a) != len(b) { |
46 | return false | |
47 | } | |
48 | for i, c := range a { | |
49 | if c != b[i] { | |
50 | return false | |
51 | } | |
52 | } | |
53 | return true | |
54 | } | |
55 | ||
56 | // explode splits s into an array of UTF-8 sequences, one per Unicode character (still arrays of bytes), | |
57 | // up to a maximum of n byte arrays. Invalid UTF-8 sequences are chopped into individual bytes. | |
58 | func explode(s []byte, n int) [][]byte { | |
59 | if n <= 0 { | |
60 | n = len(s) | |
61 | } | |
62 | a := make([][]byte, n) | |
63 | var size int | |
64 | na := 0 | |
65 | for len(s) > 0 { | |
66 | if na+1 >= n { | |
67 | a[na] = s | |
68 | na++ | |
69 | break | |
70 | } | |
71 | _, size = utf8.DecodeRune(s) | |
72 | a[na] = s[0:size] | |
73 | s = s[size:] | |
74 | na++ | |
75 | } | |
76 | return a[0:na] | |
77 | } | |
78 | ||
79 | // Count counts the number of non-overlapping instances of sep in s. | |
80 | func Count(s, sep []byte) int { | |
9a0e3259 ILT |
81 | n := len(sep) |
82 | if n == 0 { | |
7a938933 ILT |
83 | return utf8.RuneCount(s) + 1 |
84 | } | |
9a0e3259 ILT |
85 | if n > len(s) { |
86 | return 0 | |
87 | } | |
88 | count := 0 | |
7a938933 | 89 | c := sep[0] |
9a0e3259 ILT |
90 | i := 0 |
91 | t := s[:len(s)-n+1] | |
92 | for i < len(t) { | |
93 | if t[i] != c { | |
94 | o := IndexByte(t[i:], c) | |
95 | if o < 0 { | |
96 | break | |
97 | } | |
98 | i += o | |
7a938933 | 99 | } |
9a0e3259 ILT |
100 | if n == 1 || Equal(s[i:i+n], sep) { |
101 | count++ | |
102 | i += n | |
103 | continue | |
104 | } | |
105 | i++ | |
7a938933 | 106 | } |
9a0e3259 | 107 | return count |
7a938933 ILT |
108 | } |
109 | ||
9c63abc9 ILT |
110 | // Contains returns whether subslice is within b. |
111 | func Contains(b, subslice []byte) bool { | |
112 | return Index(b, subslice) != -1 | |
113 | } | |
114 | ||
7a938933 ILT |
115 | // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. |
116 | func Index(s, sep []byte) int { | |
117 | n := len(sep) | |
118 | if n == 0 { | |
119 | return 0 | |
120 | } | |
9a0e3259 ILT |
121 | if n > len(s) { |
122 | return -1 | |
123 | } | |
7a938933 | 124 | c := sep[0] |
9a0e3259 ILT |
125 | if n == 1 { |
126 | return IndexByte(s, c) | |
127 | } | |
128 | i := 0 | |
129 | t := s[:len(s)-n+1] | |
130 | for i < len(t) { | |
131 | if t[i] != c { | |
132 | o := IndexByte(t[i:], c) | |
133 | if o < 0 { | |
134 | break | |
135 | } | |
136 | i += o | |
137 | } | |
138 | if Equal(s[i:i+n], sep) { | |
7a938933 ILT |
139 | return i |
140 | } | |
9a0e3259 | 141 | i++ |
7a938933 ILT |
142 | } |
143 | return -1 | |
144 | } | |
145 | ||
146 | func indexBytePortable(s []byte, c byte) int { | |
147 | for i, b := range s { | |
148 | if b == c { | |
149 | return i | |
150 | } | |
151 | } | |
152 | return -1 | |
153 | } | |
154 | ||
155 | // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. | |
156 | func LastIndex(s, sep []byte) int { | |
157 | n := len(sep) | |
158 | if n == 0 { | |
159 | return len(s) | |
160 | } | |
161 | c := sep[0] | |
162 | for i := len(s) - n; i >= 0; i-- { | |
163 | if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) { | |
164 | return i | |
165 | } | |
166 | } | |
167 | return -1 | |
168 | } | |
169 | ||
170 | // IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points. | |
171 | // It returns the byte index of the first occurrence in s of the given rune. | |
172 | // It returns -1 if rune is not present in s. | |
506cf9aa | 173 | func IndexRune(s []byte, r rune) int { |
7a938933 | 174 | for i := 0; i < len(s); { |
506cf9aa ILT |
175 | r1, size := utf8.DecodeRune(s[i:]) |
176 | if r == r1 { | |
7a938933 ILT |
177 | return i |
178 | } | |
179 | i += size | |
180 | } | |
181 | return -1 | |
182 | } | |
183 | ||
184 | // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points. | |
185 | // It returns the byte index of the first occurrence in s of any of the Unicode | |
186 | // code points in chars. It returns -1 if chars is empty or if there is no code | |
187 | // point in common. | |
188 | func IndexAny(s []byte, chars string) int { | |
189 | if len(chars) > 0 { | |
506cf9aa ILT |
190 | var r rune |
191 | var width int | |
7a938933 | 192 | for i := 0; i < len(s); i += width { |
506cf9aa ILT |
193 | r = rune(s[i]) |
194 | if r < utf8.RuneSelf { | |
7a938933 ILT |
195 | width = 1 |
196 | } else { | |
506cf9aa | 197 | r, width = utf8.DecodeRune(s[i:]) |
7a938933 | 198 | } |
506cf9aa ILT |
199 | for _, ch := range chars { |
200 | if r == ch { | |
7a938933 ILT |
201 | return i |
202 | } | |
203 | } | |
204 | } | |
205 | } | |
206 | return -1 | |
207 | } | |
208 | ||
ff5f50c5 ILT |
209 | // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code |
210 | // points. It returns the byte index of the last occurrence in s of any of | |
211 | // the Unicode code points in chars. It returns -1 if chars is empty or if | |
212 | // there is no code point in common. | |
213 | func LastIndexAny(s []byte, chars string) int { | |
214 | if len(chars) > 0 { | |
215 | for i := len(s); i > 0; { | |
506cf9aa | 216 | r, size := utf8.DecodeLastRune(s[0:i]) |
ff5f50c5 | 217 | i -= size |
506cf9aa ILT |
218 | for _, ch := range chars { |
219 | if r == ch { | |
ff5f50c5 ILT |
220 | return i |
221 | } | |
222 | } | |
223 | } | |
224 | } | |
225 | return -1 | |
226 | } | |
227 | ||
7a938933 ILT |
228 | // Generic split: splits after each instance of sep, |
229 | // including sepSave bytes of sep in the subarrays. | |
230 | func genSplit(s, sep []byte, sepSave, n int) [][]byte { | |
231 | if n == 0 { | |
232 | return nil | |
233 | } | |
234 | if len(sep) == 0 { | |
235 | return explode(s, n) | |
236 | } | |
237 | if n < 0 { | |
238 | n = Count(s, sep) + 1 | |
239 | } | |
240 | c := sep[0] | |
241 | start := 0 | |
242 | a := make([][]byte, n) | |
243 | na := 0 | |
244 | for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ { | |
245 | if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) { | |
246 | a[na] = s[start : i+sepSave] | |
247 | na++ | |
248 | start = i + len(sep) | |
249 | i += len(sep) - 1 | |
250 | } | |
251 | } | |
252 | a[na] = s[start:] | |
253 | return a[0 : na+1] | |
254 | } | |
255 | ||
adb0401d | 256 | // SplitN slices s into subslices separated by sep and returns a slice of |
7a938933 | 257 | // the subslices between those separators. |
adb0401d | 258 | // If sep is empty, SplitN splits after each UTF-8 sequence. |
7a938933 ILT |
259 | // The count determines the number of subslices to return: |
260 | // n > 0: at most n subslices; the last subslice will be the unsplit remainder. | |
261 | // n == 0: the result is nil (zero subslices) | |
262 | // n < 0: all subslices | |
adb0401d | 263 | func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) } |
7a938933 | 264 | |
adb0401d | 265 | // SplitAfterN slices s into subslices after each instance of sep and |
7a938933 | 266 | // returns a slice of those subslices. |
adb0401d | 267 | // If sep is empty, SplitAfterN splits after each UTF-8 sequence. |
7a938933 ILT |
268 | // The count determines the number of subslices to return: |
269 | // n > 0: at most n subslices; the last subslice will be the unsplit remainder. | |
270 | // n == 0: the result is nil (zero subslices) | |
271 | // n < 0: all subslices | |
adb0401d | 272 | func SplitAfterN(s, sep []byte, n int) [][]byte { |
7a938933 ILT |
273 | return genSplit(s, sep, len(sep), n) |
274 | } | |
275 | ||
adb0401d ILT |
276 | // Split slices s into all subslices separated by sep and returns a slice of |
277 | // the subslices between those separators. | |
278 | // If sep is empty, Split splits after each UTF-8 sequence. | |
279 | // It is equivalent to SplitN with a count of -1. | |
280 | func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) } | |
281 | ||
282 | // SplitAfter slices s into all subslices after each instance of sep and | |
283 | // returns a slice of those subslices. | |
284 | // If sep is empty, SplitAfter splits after each UTF-8 sequence. | |
285 | // It is equivalent to SplitAfterN with a count of -1. | |
286 | func SplitAfter(s, sep []byte) [][]byte { | |
287 | return genSplit(s, sep, len(sep), -1) | |
288 | } | |
289 | ||
7a938933 ILT |
290 | // Fields splits the array s around each instance of one or more consecutive white space |
291 | // characters, returning a slice of subarrays of s or an empty list if s contains only white space. | |
292 | func Fields(s []byte) [][]byte { | |
293 | return FieldsFunc(s, unicode.IsSpace) | |
294 | } | |
295 | ||
296 | // FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points. | |
297 | // It splits the array s at each run of code points c satisfying f(c) and | |
298 | // returns a slice of subarrays of s. If no code points in s satisfy f(c), an | |
299 | // empty slice is returned. | |
506cf9aa | 300 | func FieldsFunc(s []byte, f func(rune) bool) [][]byte { |
7a938933 ILT |
301 | n := 0 |
302 | inField := false | |
303 | for i := 0; i < len(s); { | |
506cf9aa | 304 | r, size := utf8.DecodeRune(s[i:]) |
7a938933 | 305 | wasInField := inField |
506cf9aa | 306 | inField = !f(r) |
7a938933 ILT |
307 | if inField && !wasInField { |
308 | n++ | |
309 | } | |
310 | i += size | |
311 | } | |
312 | ||
313 | a := make([][]byte, n) | |
314 | na := 0 | |
315 | fieldStart := -1 | |
316 | for i := 0; i <= len(s) && na < n; { | |
506cf9aa ILT |
317 | r, size := utf8.DecodeRune(s[i:]) |
318 | if fieldStart < 0 && size > 0 && !f(r) { | |
7a938933 ILT |
319 | fieldStart = i |
320 | i += size | |
321 | continue | |
322 | } | |
506cf9aa | 323 | if fieldStart >= 0 && (size == 0 || f(r)) { |
7a938933 ILT |
324 | a[na] = s[fieldStart:i] |
325 | na++ | |
326 | fieldStart = -1 | |
327 | } | |
328 | if size == 0 { | |
329 | break | |
330 | } | |
331 | i += size | |
332 | } | |
333 | return a[0:na] | |
334 | } | |
335 | ||
4ccad563 | 336 | // Join concatenates the elements of a to create a new byte array. The separator |
7a938933 ILT |
337 | // sep is placed between elements in the resulting array. |
338 | func Join(a [][]byte, sep []byte) []byte { | |
339 | if len(a) == 0 { | |
340 | return []byte{} | |
341 | } | |
342 | if len(a) == 1 { | |
4ccad563 ILT |
343 | // Just return a copy. |
344 | return append([]byte(nil), a[0]...) | |
7a938933 ILT |
345 | } |
346 | n := len(sep) * (len(a) - 1) | |
347 | for i := 0; i < len(a); i++ { | |
348 | n += len(a[i]) | |
349 | } | |
350 | ||
351 | b := make([]byte, n) | |
f72f4169 ILT |
352 | bp := copy(b, a[0]) |
353 | for _, s := range a[1:] { | |
354 | bp += copy(b[bp:], sep) | |
355 | bp += copy(b[bp:], s) | |
7a938933 ILT |
356 | } |
357 | return b | |
358 | } | |
359 | ||
360 | // HasPrefix tests whether the byte array s begins with prefix. | |
361 | func HasPrefix(s, prefix []byte) bool { | |
362 | return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix) | |
363 | } | |
364 | ||
365 | // HasSuffix tests whether the byte array s ends with suffix. | |
366 | func HasSuffix(s, suffix []byte) bool { | |
367 | return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix) | |
368 | } | |
369 | ||
370 | // Map returns a copy of the byte array s with all its characters modified | |
371 | // according to the mapping function. If mapping returns a negative value, the character is | |
372 | // dropped from the string with no replacement. The characters in s and the | |
373 | // output are interpreted as UTF-8-encoded Unicode code points. | |
506cf9aa | 374 | func Map(mapping func(r rune) rune, s []byte) []byte { |
7a938933 ILT |
375 | // In the worst case, the array can grow when mapped, making |
376 | // things unpleasant. But it's so rare we barge in assuming it's | |
377 | // fine. It could also shrink but that falls out naturally. | |
378 | maxbytes := len(s) // length of b | |
379 | nbytes := 0 // number of bytes encoded in b | |
380 | b := make([]byte, maxbytes) | |
381 | for i := 0; i < len(s); { | |
382 | wid := 1 | |
506cf9aa ILT |
383 | r := rune(s[i]) |
384 | if r >= utf8.RuneSelf { | |
385 | r, wid = utf8.DecodeRune(s[i:]) | |
7a938933 | 386 | } |
506cf9aa ILT |
387 | r = mapping(r) |
388 | if r >= 0 { | |
389 | if nbytes+utf8.RuneLen(r) > maxbytes { | |
7a938933 ILT |
390 | // Grow the buffer. |
391 | maxbytes = maxbytes*2 + utf8.UTFMax | |
392 | nb := make([]byte, maxbytes) | |
393 | copy(nb, b[0:nbytes]) | |
394 | b = nb | |
395 | } | |
506cf9aa | 396 | nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) |
7a938933 ILT |
397 | } |
398 | i += wid | |
399 | } | |
400 | return b[0:nbytes] | |
401 | } | |
402 | ||
403 | // Repeat returns a new byte slice consisting of count copies of b. | |
404 | func Repeat(b []byte, count int) []byte { | |
405 | nb := make([]byte, len(b)*count) | |
406 | bp := 0 | |
407 | for i := 0; i < count; i++ { | |
408 | for j := 0; j < len(b); j++ { | |
409 | nb[bp] = b[j] | |
410 | bp++ | |
411 | } | |
412 | } | |
413 | return nb | |
414 | } | |
415 | ||
416 | // ToUpper returns a copy of the byte array s with all Unicode letters mapped to their upper case. | |
417 | func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) } | |
418 | ||
bd2e46c8 | 419 | // ToLower returns a copy of the byte array s with all Unicode letters mapped to their lower case. |
7a938933 ILT |
420 | func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } |
421 | ||
422 | // ToTitle returns a copy of the byte array s with all Unicode letters mapped to their title case. | |
423 | func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } | |
424 | ||
425 | // ToUpperSpecial returns a copy of the byte array s with all Unicode letters mapped to their | |
426 | // upper case, giving priority to the special casing rules. | |
427 | func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte { | |
506cf9aa | 428 | return Map(func(r rune) rune { return _case.ToUpper(r) }, s) |
7a938933 ILT |
429 | } |
430 | ||
431 | // ToLowerSpecial returns a copy of the byte array s with all Unicode letters mapped to their | |
432 | // lower case, giving priority to the special casing rules. | |
433 | func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte { | |
506cf9aa | 434 | return Map(func(r rune) rune { return _case.ToLower(r) }, s) |
7a938933 ILT |
435 | } |
436 | ||
437 | // ToTitleSpecial returns a copy of the byte array s with all Unicode letters mapped to their | |
438 | // title case, giving priority to the special casing rules. | |
439 | func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte { | |
506cf9aa | 440 | return Map(func(r rune) rune { return _case.ToTitle(r) }, s) |
7a938933 ILT |
441 | } |
442 | ||
7a938933 ILT |
443 | // isSeparator reports whether the rune could mark a word boundary. |
444 | // TODO: update when package unicode captures more of the properties. | |
506cf9aa | 445 | func isSeparator(r rune) bool { |
7a938933 | 446 | // ASCII alphanumerics and underscore are not separators |
506cf9aa | 447 | if r <= 0x7F { |
7a938933 | 448 | switch { |
506cf9aa | 449 | case '0' <= r && r <= '9': |
7a938933 | 450 | return false |
506cf9aa | 451 | case 'a' <= r && r <= 'z': |
7a938933 | 452 | return false |
506cf9aa | 453 | case 'A' <= r && r <= 'Z': |
7a938933 | 454 | return false |
506cf9aa | 455 | case r == '_': |
7a938933 ILT |
456 | return false |
457 | } | |
458 | return true | |
459 | } | |
460 | // Letters and digits are not separators | |
506cf9aa | 461 | if unicode.IsLetter(r) || unicode.IsDigit(r) { |
7a938933 ILT |
462 | return false |
463 | } | |
464 | // Otherwise, all we can do for now is treat spaces as separators. | |
506cf9aa | 465 | return unicode.IsSpace(r) |
7a938933 ILT |
466 | } |
467 | ||
468 | // BUG(r): The rule Title uses for word boundaries does not handle Unicode punctuation properly. | |
469 | ||
470 | // Title returns a copy of s with all Unicode letters that begin words | |
471 | // mapped to their title case. | |
472 | func Title(s []byte) []byte { | |
473 | // Use a closure here to remember state. | |
474 | // Hackish but effective. Depends on Map scanning in order and calling | |
475 | // the closure once per rune. | |
9a0e3259 | 476 | prev := ' ' |
7a938933 | 477 | return Map( |
506cf9aa | 478 | func(r rune) rune { |
7a938933 ILT |
479 | if isSeparator(prev) { |
480 | prev = r | |
481 | return unicode.ToTitle(r) | |
482 | } | |
483 | prev = r | |
484 | return r | |
485 | }, | |
486 | s) | |
487 | } | |
488 | ||
489 | // TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8-encoded | |
490 | // Unicode code points c that satisfy f(c). | |
506cf9aa | 491 | func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { |
7a938933 ILT |
492 | i := indexFunc(s, f, false) |
493 | if i == -1 { | |
494 | return nil | |
495 | } | |
496 | return s[i:] | |
497 | } | |
498 | ||
499 | // TrimRightFunc returns a subslice of s by slicing off all trailing UTF-8 | |
500 | // encoded Unicode code points c that satisfy f(c). | |
506cf9aa | 501 | func TrimRightFunc(s []byte, f func(r rune) bool) []byte { |
7a938933 ILT |
502 | i := lastIndexFunc(s, f, false) |
503 | if i >= 0 && s[i] >= utf8.RuneSelf { | |
504 | _, wid := utf8.DecodeRune(s[i:]) | |
505 | i += wid | |
506 | } else { | |
507 | i++ | |
508 | } | |
509 | return s[0:i] | |
510 | } | |
511 | ||
512 | // TrimFunc returns a subslice of s by slicing off all leading and trailing | |
513 | // UTF-8-encoded Unicode code points c that satisfy f(c). | |
506cf9aa | 514 | func TrimFunc(s []byte, f func(r rune) bool) []byte { |
7a938933 ILT |
515 | return TrimRightFunc(TrimLeftFunc(s, f), f) |
516 | } | |
517 | ||
518 | // IndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. | |
519 | // It returns the byte index in s of the first Unicode | |
520 | // code point satisfying f(c), or -1 if none do. | |
506cf9aa | 521 | func IndexFunc(s []byte, f func(r rune) bool) int { |
7a938933 ILT |
522 | return indexFunc(s, f, true) |
523 | } | |
524 | ||
525 | // LastIndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. | |
526 | // It returns the byte index in s of the last Unicode | |
527 | // code point satisfying f(c), or -1 if none do. | |
506cf9aa | 528 | func LastIndexFunc(s []byte, f func(r rune) bool) int { |
7a938933 ILT |
529 | return lastIndexFunc(s, f, true) |
530 | } | |
531 | ||
532 | // indexFunc is the same as IndexFunc except that if | |
533 | // truth==false, the sense of the predicate function is | |
534 | // inverted. | |
506cf9aa | 535 | func indexFunc(s []byte, f func(r rune) bool, truth bool) int { |
7a938933 ILT |
536 | start := 0 |
537 | for start < len(s) { | |
538 | wid := 1 | |
506cf9aa ILT |
539 | r := rune(s[start]) |
540 | if r >= utf8.RuneSelf { | |
541 | r, wid = utf8.DecodeRune(s[start:]) | |
7a938933 | 542 | } |
506cf9aa | 543 | if f(r) == truth { |
7a938933 ILT |
544 | return start |
545 | } | |
546 | start += wid | |
547 | } | |
548 | return -1 | |
549 | } | |
550 | ||
551 | // lastIndexFunc is the same as LastIndexFunc except that if | |
552 | // truth==false, the sense of the predicate function is | |
553 | // inverted. | |
506cf9aa | 554 | func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int { |
7a938933 | 555 | for i := len(s); i > 0; { |
506cf9aa | 556 | r, size := utf8.DecodeLastRune(s[0:i]) |
7a938933 | 557 | i -= size |
506cf9aa | 558 | if f(r) == truth { |
7a938933 ILT |
559 | return i |
560 | } | |
561 | } | |
562 | return -1 | |
563 | } | |
564 | ||
506cf9aa ILT |
565 | func makeCutsetFunc(cutset string) func(r rune) bool { |
566 | return func(r rune) bool { | |
7a938933 | 567 | for _, c := range cutset { |
506cf9aa | 568 | if c == r { |
7a938933 ILT |
569 | return true |
570 | } | |
571 | } | |
572 | return false | |
573 | } | |
574 | } | |
575 | ||
576 | // Trim returns a subslice of s by slicing off all leading and | |
577 | // trailing UTF-8-encoded Unicode code points contained in cutset. | |
578 | func Trim(s []byte, cutset string) []byte { | |
579 | return TrimFunc(s, makeCutsetFunc(cutset)) | |
580 | } | |
581 | ||
582 | // TrimLeft returns a subslice of s by slicing off all leading | |
583 | // UTF-8-encoded Unicode code points contained in cutset. | |
584 | func TrimLeft(s []byte, cutset string) []byte { | |
585 | return TrimLeftFunc(s, makeCutsetFunc(cutset)) | |
586 | } | |
587 | ||
588 | // TrimRight returns a subslice of s by slicing off all trailing | |
589 | // UTF-8-encoded Unicode code points that are contained in cutset. | |
590 | func TrimRight(s []byte, cutset string) []byte { | |
591 | return TrimRightFunc(s, makeCutsetFunc(cutset)) | |
592 | } | |
593 | ||
594 | // TrimSpace returns a subslice of s by slicing off all leading and | |
ff5f50c5 | 595 | // trailing white space, as defined by Unicode. |
7a938933 ILT |
596 | func TrimSpace(s []byte) []byte { |
597 | return TrimFunc(s, unicode.IsSpace) | |
598 | } | |
599 | ||
7a938933 | 600 | // Runes returns a slice of runes (Unicode code points) equivalent to s. |
506cf9aa ILT |
601 | func Runes(s []byte) []rune { |
602 | t := make([]rune, utf8.RuneCount(s)) | |
7a938933 ILT |
603 | i := 0 |
604 | for len(s) > 0 { | |
605 | r, l := utf8.DecodeRune(s) | |
606 | t[i] = r | |
607 | i++ | |
608 | s = s[l:] | |
609 | } | |
610 | return t | |
611 | } | |
612 | ||
613 | // Replace returns a copy of the slice s with the first n | |
614 | // non-overlapping instances of old replaced by new. | |
615 | // If n < 0, there is no limit on the number of replacements. | |
616 | func Replace(s, old, new []byte, n int) []byte { | |
d8f41257 ILT |
617 | m := 0 |
618 | if n != 0 { | |
619 | // Compute number of replacements. | |
620 | m = Count(s, old) | |
621 | } | |
622 | if m == 0 { | |
4ccad563 ILT |
623 | // Just return a copy. |
624 | return append([]byte(nil), s...) | |
d8f41257 ILT |
625 | } |
626 | if n < 0 || m < n { | |
7a938933 ILT |
627 | n = m |
628 | } | |
629 | ||
630 | // Apply replacements to buffer. | |
631 | t := make([]byte, len(s)+n*(len(new)-len(old))) | |
632 | w := 0 | |
633 | start := 0 | |
634 | for i := 0; i < n; i++ { | |
635 | j := start | |
636 | if len(old) == 0 { | |
637 | if i > 0 { | |
638 | _, wid := utf8.DecodeRune(s[start:]) | |
639 | j += wid | |
640 | } | |
641 | } else { | |
642 | j += Index(s[start:], old) | |
643 | } | |
644 | w += copy(t[w:], s[start:j]) | |
645 | w += copy(t[w:], new) | |
646 | start = j + len(old) | |
647 | } | |
648 | w += copy(t[w:], s[start:]) | |
649 | return t[0:w] | |
650 | } | |
d8f41257 ILT |
651 | |
652 | // EqualFold reports whether s and t, interpreted as UTF-8 strings, | |
653 | // are equal under Unicode case-folding. | |
654 | func EqualFold(s, t []byte) bool { | |
655 | for len(s) != 0 && len(t) != 0 { | |
656 | // Extract first rune from each. | |
506cf9aa | 657 | var sr, tr rune |
d8f41257 | 658 | if s[0] < utf8.RuneSelf { |
506cf9aa | 659 | sr, s = rune(s[0]), s[1:] |
d8f41257 ILT |
660 | } else { |
661 | r, size := utf8.DecodeRune(s) | |
662 | sr, s = r, s[size:] | |
663 | } | |
664 | if t[0] < utf8.RuneSelf { | |
506cf9aa | 665 | tr, t = rune(t[0]), t[1:] |
d8f41257 ILT |
666 | } else { |
667 | r, size := utf8.DecodeRune(t) | |
668 | tr, t = r, t[size:] | |
669 | } | |
670 | ||
671 | // If they match, keep going; if not, return false. | |
672 | ||
673 | // Easy case. | |
674 | if tr == sr { | |
675 | continue | |
676 | } | |
677 | ||
678 | // Make sr < tr to simplify what follows. | |
679 | if tr < sr { | |
680 | tr, sr = sr, tr | |
681 | } | |
682 | // Fast check for ASCII. | |
683 | if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' { | |
684 | // ASCII, and sr is upper case. tr must be lower case. | |
685 | if tr == sr+'a'-'A' { | |
686 | continue | |
687 | } | |
688 | return false | |
689 | } | |
690 | ||
691 | // General case. SimpleFold(x) returns the next equivalent rune > x | |
692 | // or wraps around to smaller values. | |
693 | r := unicode.SimpleFold(sr) | |
694 | for r != sr && r < tr { | |
695 | r = unicode.SimpleFold(r) | |
696 | } | |
697 | if r == tr { | |
698 | continue | |
699 | } | |
700 | return false | |
701 | } | |
702 | ||
703 | // One string is empty. Are both? | |
704 | return len(s) == len(t) | |
705 | } |