]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/utf8.h
tree-wide: "<n>bit" → "<n>-bit"
[thirdparty/systemd.git] / src / basic / utf8.h
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
c2f1db8f 2#pragma once
7f110ff9 3
31f7bf19 4#include <stdbool.h>
11c3a366
TA
5#include <stddef.h>
6#include <stdint.h>
c932fb71 7#include <uchar.h>
31f7bf19 8
7f110ff9 9#include "macro.h"
38e0c63d 10#include "missing_type.h"
7f110ff9 11
550a40ec 12#define UTF8_REPLACEMENT_CHARACTER "\xef\xbf\xbd"
9dd7ea9a 13#define UTF8_BYTE_ORDER_MARK "\xef\xbb\xbf"
550a40ec 14
c932fb71 15bool unichar_is_valid(char32_t c);
f3ee6297 16
80ab31a4
ZJS
17char *utf8_is_valid_n(const char *str, size_t len_bytes) _pure_;
18static inline char *utf8_is_valid(const char *s) {
f5fbe71d 19 return utf8_is_valid_n(s, SIZE_MAX);
80ab31a4 20}
7f110ff9 21char *ascii_is_valid(const char *s) _pure_;
294a3121 22char *ascii_is_valid_n(const char *str, size_t len);
7f110ff9 23
9b49a3b4
ZJS
24int utf8_to_ascii(const char *str, char replacement_char, char **ret);
25
618727da 26bool utf8_is_printable_newline(const char* str, size_t length, bool allow_newline) _pure_;
6ed62be0
LP
27#define utf8_is_printable(str, length) utf8_is_printable_newline(str, length, true)
28
29char *utf8_escape_invalid(const char *s);
fc96e5c0 30char *utf8_escape_non_printable_full(const char *str, size_t console_width, bool force_ellipsis);
da88f542 31static inline char *utf8_escape_non_printable(const char *str) {
fc96e5c0 32 return utf8_escape_non_printable_full(str, SIZE_MAX, false);
da88f542 33}
ba961854 34
c932fb71 35size_t utf8_encode_unichar(char *out_utf8, char32_t g);
80b0a597
LP
36size_t utf16_encode_unichar(char16_t *out, char32_t c);
37
2ac2ff3f 38char *utf16_to_utf8(const char16_t *s, size_t length /* bytes! */);
80b0a597
LP
39char16_t *utf8_to_utf16(const char *s, size_t length);
40
da890466 41size_t char16_strlen(const char16_t *s); /* returns the number of 16-bit words in the string (not bytes!) */
02a36bc9 42
92e068b4 43int utf8_encoded_valid_unichar(const char *str, size_t length);
c932fb71 44int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar);
04166cb7 45
c932fb71 46static inline bool utf16_is_surrogate(char16_t c) {
07667be7 47 return c >= 0xd800U && c <= 0xdfffU;
04166cb7
TG
48}
49
c932fb71 50static inline bool utf16_is_trailing_surrogate(char16_t c) {
07667be7 51 return c >= 0xdc00U && c <= 0xdfffU;
04166cb7
TG
52}
53
c932fb71 54static inline char32_t utf16_surrogate_pair_to_unichar(char16_t lead, char16_t trail) {
07667be7 55 return ((((char32_t) lead - 0xd800U) << 10) + ((char32_t) trail - 0xdc00U) + 0x10000U);
04166cb7 56}
65ee8660
LP
57
58size_t utf8_n_codepoints(const char *str);
3f536d5b 59size_t utf8_console_width(const char *str);