]> git.ipfire.org Git - thirdparty/ccache.git/blob - src/util/string.cpp
refactor: Remove superfluous util:: qualifications
[thirdparty/ccache.git] / src / util / string.cpp
1 // Copyright (C) 2021-2023 Joel Rosdahl and other contributors
2 //
3 // See doc/AUTHORS.adoc for a complete list of contributors.
4 //
5 // This program is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License as published by the Free
7 // Software Foundation; either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // This program is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 // more details.
14 //
15 // You should have received a copy of the GNU General Public License along with
16 // this program; if not, write to the Free Software Foundation, Inc., 51
17 // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 #include "string.hpp"
20
21 #include <assertions.hpp>
22 #include <fmtmacros.hpp>
23
24 #include <algorithm>
25 #include <cctype>
26 #include <iostream>
27
28 namespace {
29
30 template<typename T>
31 std::vector<T>
32 split_into(std::string_view string,
33 const char* separators,
34 util::Tokenizer::Mode mode,
35 util::Tokenizer::IncludeDelimiter include_delimiter)
36
37 {
38 std::vector<T> result;
39 for (const auto token :
40 util::Tokenizer(string, separators, mode, include_delimiter)) {
41 result.emplace_back(token);
42 }
43 return result;
44 }
45
46 } // namespace
47
48 namespace util {
49
50 std::string
51 format_argv_for_logging(const char* const* argv)
52 {
53 std::string result;
54 for (size_t i = 0; argv[i]; ++i) {
55 if (i != 0) {
56 result += ' ';
57 }
58 std::string arg = replace_all(argv[i], "\\", "\\\\");
59 arg = replace_all(arg, "\"", "\\\"");
60 if (arg.empty() || arg.find(' ') != std::string::npos) {
61 arg = FMT("\"{}\"", arg);
62 }
63 result += arg;
64 }
65 return result;
66 }
67
68 std::string
69 format_base16(nonstd::span<const uint8_t> data)
70 {
71 static const char digits[] = "0123456789abcdef";
72 std::string result;
73 result.reserve(2 * data.size());
74 for (uint8_t b : data) {
75 result += digits[b >> 4];
76 result += digits[b & 0xF];
77 }
78 return result;
79 }
80
81 std::string
82 format_base32hex(nonstd::span<const uint8_t> data)
83 {
84 static const char digits[] = "0123456789abcdefghijklmnopqrstuv";
85 std::string result;
86 result.reserve(data.size() * 8 / 5 + 1);
87 uint8_t i = 0;
88 uint16_t bits = 0;
89 for (uint8_t b : data) {
90 bits <<= 8;
91 bits |= b;
92 i += 8;
93 while (i >= 5) {
94 result += digits[(bits >> (i - 5)) & 0x1f];
95 i -= 5;
96 }
97 }
98 if (i > 0) {
99 DEBUG_ASSERT(i < 5);
100 result += digits[(bits << (5 - i)) & 0x1f];
101 }
102 return result;
103 }
104
105 std::string
106 format_digest(nonstd::span<const uint8_t> data)
107 {
108 const size_t base16_bytes = 2;
109 ASSERT(data.size() >= base16_bytes);
110 return format_base16({data.data(), base16_bytes})
111 + format_base32hex(
112 {data.data() + base16_bytes, data.size() - base16_bytes});
113 }
114
115 std::string
116 format_human_readable_diff(int64_t diff, SizeUnitPrefixType prefix_type)
117 {
118 const char* sign = diff == 0 ? "" : (diff > 0 ? "+" : "-");
119 return FMT(
120 "{}{}", sign, format_human_readable_size(std::abs(diff), prefix_type));
121 }
122
123 std::string
124 format_human_readable_size(uint64_t size, SizeUnitPrefixType prefix_type)
125 {
126 const double factor = prefix_type == SizeUnitPrefixType::binary ? 1024 : 1000;
127 const char* infix = prefix_type == SizeUnitPrefixType::binary ? "i" : "";
128 if (size >= factor * factor * factor) {
129 return FMT("{:.1f} G{}B", size / (factor * factor * factor), infix);
130 } else if (size >= factor * factor) {
131 return FMT("{:.1f} M{}B", size / (factor * factor), infix);
132 } else if (size >= factor) {
133 const char* k = prefix_type == SizeUnitPrefixType::binary ? "K" : "k";
134 return FMT("{:.1f} {}{}B", size / factor, k, infix);
135 } else if (size == 1) {
136 return "1 byte";
137 } else {
138 return FMT("{} bytes", size);
139 }
140 }
141
142 tl::expected<double, std::string>
143 parse_double(const std::string& value)
144 {
145 size_t end;
146 double result;
147 bool failed = false;
148 try {
149 result = std::stod(value, &end);
150 } catch (const std::exception&) {
151 failed = true;
152 }
153
154 if (failed || end != value.size()) {
155 return tl::unexpected(FMT("invalid floating point: \"{}\"", value));
156 } else {
157 return result;
158 }
159 }
160
161 tl::expected<uint64_t, std::string>
162 parse_duration(std::string_view duration)
163 {
164 uint64_t factor = 0;
165 char last_ch = duration.empty() ? '\0' : duration[duration.length() - 1];
166
167 switch (last_ch) {
168 case 'd':
169 factor = 24 * 60 * 60;
170 break;
171 case 's':
172 factor = 1;
173 break;
174 default:
175 return tl::unexpected(FMT(
176 "invalid suffix (supported: d (day) and s (second)): \"{}\"", duration));
177 }
178
179 auto value = parse_unsigned(duration.substr(0, duration.length() - 1));
180 if (!value) {
181 return value;
182 };
183 return factor * *value;
184 }
185
186 tl::expected<int64_t, std::string>
187 parse_signed(std::string_view value,
188 const std::optional<int64_t> min_value,
189 const std::optional<int64_t> max_value,
190 const std::string_view description)
191 {
192 const std::string stripped_value = strip_whitespace(value);
193
194 size_t end = 0;
195 long long result = 0;
196 bool failed = false;
197 try {
198 // Note: sizeof(long long) is guaranteed to be >= sizeof(int64_t)
199 result = std::stoll(stripped_value, &end, 10);
200 } catch (std::exception&) {
201 failed = true;
202 }
203 if (failed || end != stripped_value.size()) {
204 return tl::unexpected(FMT("invalid integer: \"{}\"", stripped_value));
205 }
206
207 const int64_t min = min_value ? *min_value : INT64_MIN;
208 const int64_t max = max_value ? *max_value : INT64_MAX;
209 if (result < min || result > max) {
210 return tl::unexpected(
211 FMT("{} must be between {} and {}", description, min, max));
212 } else {
213 return result;
214 }
215 }
216
217 tl::expected<std::pair<uint64_t, SizeUnitPrefixType>, std::string>
218 parse_size(const std::string& value)
219 {
220 errno = 0;
221
222 char* p;
223 double result = strtod(value.c_str(), &p);
224 if (errno != 0 || result < 0 || p == value.c_str() || value.empty()) {
225 return tl::unexpected(FMT("invalid size: \"{}\"", value));
226 }
227
228 while (isspace(*p)) {
229 ++p;
230 }
231
232 SizeUnitPrefixType prefix_type;
233 if (*p != '\0') {
234 prefix_type = *(p + 1) == 'i' ? SizeUnitPrefixType::binary
235 : SizeUnitPrefixType::decimal;
236 unsigned multiplier =
237 prefix_type == SizeUnitPrefixType::binary ? 1024 : 1000;
238 switch (*p) {
239 case 'T':
240 result *= multiplier;
241 [[fallthrough]];
242 case 'G':
243 result *= multiplier;
244 [[fallthrough]];
245 case 'M':
246 result *= multiplier;
247 [[fallthrough]];
248 case 'K':
249 case 'k':
250 result *= multiplier;
251 break;
252 default:
253 return tl::unexpected(FMT("invalid size: \"{}\"", value));
254 }
255 } else {
256 result *= 1024 * 1024 * 1024;
257 prefix_type = SizeUnitPrefixType::binary;
258 }
259
260 return std::make_pair(static_cast<uint64_t>(result), prefix_type);
261 }
262
263 tl::expected<mode_t, std::string>
264 parse_umask(std::string_view value)
265 {
266 return parse_unsigned(value, 0, 0777, "umask", 8);
267 }
268
269 tl::expected<uint64_t, std::string>
270 parse_unsigned(std::string_view value,
271 const std::optional<uint64_t> min_value,
272 const std::optional<uint64_t> max_value,
273 const std::string_view description,
274 const int base)
275 {
276 const std::string stripped_value = strip_whitespace(value);
277
278 size_t end = 0;
279 unsigned long long result = 0;
280 bool failed = false;
281 if (starts_with(stripped_value, "-")) {
282 failed = true;
283 } else {
284 try {
285 // Note: sizeof(unsigned long long) is guaranteed to be >=
286 // sizeof(uint64_t)
287 result = std::stoull(stripped_value, &end, base);
288 } catch (std::exception&) {
289 failed = true;
290 }
291 }
292 if (failed || end != stripped_value.size()) {
293 const auto base_info = base == 8 ? "octal " : "";
294 return tl::unexpected(
295 FMT("invalid unsigned {}integer: \"{}\"", base_info, stripped_value));
296 }
297
298 const uint64_t min = min_value ? *min_value : 0;
299 const uint64_t max = max_value ? *max_value : UINT64_MAX;
300 if (result < min || result > max) {
301 return tl::unexpected(
302 FMT("{} must be between {} and {}", description, min, max));
303 } else {
304 return result;
305 }
306 }
307
308 tl::expected<std::string, std::string>
309 percent_decode(std::string_view string)
310 {
311 const auto from_hex = [](const char digit) {
312 return static_cast<uint8_t>(
313 std::isdigit(digit) ? digit - '0' : std::tolower(digit) - 'a' + 10);
314 };
315
316 std::string result;
317 result.reserve(string.size());
318 size_t i = 0;
319 while (i < string.size()) {
320 if (string[i] != '%') {
321 result += string[i];
322 } else if (i + 2 >= string.size() || !std::isxdigit(string[i + 1])
323 || !std::isxdigit(string[i + 2])) {
324 return tl::unexpected(
325 FMT("invalid percent-encoded string at position {}: {}", i, string));
326 } else {
327 const char ch = static_cast<char>(from_hex(string[i + 1]) << 4
328 | from_hex(string[i + 2]));
329 result += ch;
330 i += 2;
331 }
332 ++i;
333 }
334
335 return result;
336 }
337
338 std::string
339 replace_all(const std::string_view string,
340 const std::string_view from,
341 const std::string_view to)
342 {
343 if (from.empty()) {
344 return std::string(string);
345 }
346
347 std::string result;
348 size_t left = 0;
349 size_t right = 0;
350 while (left < string.size()) {
351 right = string.find(from, left);
352 if (right == std::string_view::npos) {
353 result.append(string.data() + left);
354 break;
355 }
356 result.append(string.data() + left, right - left);
357 result.append(to.data(), to.size());
358 left = right + from.size();
359 }
360 return result;
361 }
362
363 std::string
364 replace_first(const std::string_view string,
365 const std::string_view from,
366 const std::string_view to)
367 {
368 if (from.empty()) {
369 return std::string(string);
370 }
371
372 std::string result;
373 const auto pos = string.find(from);
374 if (pos != std::string_view::npos) {
375 result.append(string.data(), pos);
376 result.append(to.data(), to.length());
377 result.append(string.data() + pos + from.size());
378 } else {
379 result = std::string(string);
380 }
381 return result;
382 }
383
384 std::vector<std::string>
385 split_into_strings(std::string_view string,
386 const char* separators,
387 Tokenizer::Mode mode,
388 Tokenizer::IncludeDelimiter include_delimiter)
389 {
390 return split_into<std::string>(string, separators, mode, include_delimiter);
391 }
392
393 std::vector<std::string_view>
394 split_into_views(std::string_view string,
395 const char* separators,
396 Tokenizer::Mode mode,
397 Tokenizer::IncludeDelimiter include_delimiter)
398 {
399 return split_into<std::string_view>(
400 string, separators, mode, include_delimiter);
401 }
402
403 std::pair<std::string_view, std::optional<std::string_view>>
404 split_once(const char* string, const char split_char)
405 {
406 return split_once(std::string_view(string), split_char);
407 }
408
409 std::pair<std::string, std::optional<std::string>>
410 split_once(std::string&& string, const char split_char)
411 {
412 const auto [left, right] = split_once(std::string_view(string), split_char);
413 if (right) {
414 return std::make_pair(std::string(left), std::string(*right));
415 } else {
416 return std::make_pair(std::string(left), std::nullopt);
417 }
418 }
419
420 std::pair<std::string_view, std::optional<std::string_view>>
421 split_once(const std::string_view string, const char split_char)
422 {
423 const size_t sep_pos = string.find(split_char);
424 if (sep_pos == std::string_view::npos) {
425 return std::make_pair(string, std::nullopt);
426 } else {
427 return std::make_pair(string.substr(0, sep_pos),
428 string.substr(sep_pos + 1));
429 }
430 }
431
432 std::string
433 strip_whitespace(const std::string_view string)
434 {
435 const auto is_space = [](const int ch) { return std::isspace(ch); };
436 const auto start = std::find_if_not(string.begin(), string.end(), is_space);
437 const auto end =
438 std::find_if_not(string.rbegin(), string.rend(), is_space).base();
439 return start < end ? std::string(start, end) : std::string();
440 }
441
442 std::string
443 to_lowercase(std::string_view string)
444 {
445 std::string result;
446 result.resize(string.length());
447 std::transform(string.begin(), string.end(), result.begin(), tolower);
448 return result;
449 }
450
451 } // namespace util