return true;
}
-char *utf8_is_valid(const char *str) {
- const char *p;
+char *utf8_is_valid_n(const char *str, size_t len_bytes) {
+ /* Check if the string is composed of valid utf8 characters. If length len_bytes is given, stop after
+ * len_bytes. Otherwise, stop at NUL. */
assert(str);
- p = str;
- while (*p) {
+ for (const char *p = str; len_bytes != (size_t) -1 ? (size_t) (p - str) < len_bytes : *p != '\0'; ) {
int len;
- len = utf8_encoded_valid_unichar(p, (size_t) -1);
- if (len < 0)
- return NULL;
+ if (_unlikely_(*p == '\0') && len_bytes != (size_t) -1)
+ return NULL; /* embedded NUL */
+
+ len = utf8_encoded_valid_unichar(p,
+ len_bytes != (size_t) -1 ? len_bytes - (p - str) : (size_t) -1);
+ if (_unlikely_(len < 0))
+ return NULL; /* invalid character */
p += len;
}
bool unichar_is_valid(char32_t c);
-char *utf8_is_valid(const char *s) _pure_;
+char *utf8_is_valid_n(const char *str, size_t len_bytes) _pure_;
+static inline char *utf8_is_valid(const char *s) {
+ return utf8_is_valid_n(s, (size_t) -1);
+}
char *ascii_is_valid(const char *s) _pure_;
char *ascii_is_valid_n(const char *str, size_t len);
assert_se(utf8_is_printable("\t", 1));
}
+static void test_utf8_n_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se( utf8_is_valid_n("ascii is valid unicode", 21));
+ assert_se( utf8_is_valid_n("ascii is valid unicode", 22));
+ assert_se(!utf8_is_valid_n("ascii is valid unicode", 23));
+ assert_se( utf8_is_valid_n("\342\204\242", 0));
+ assert_se(!utf8_is_valid_n("\342\204\242", 1));
+ assert_se(!utf8_is_valid_n("\342\204\242", 2));
+ assert_se( utf8_is_valid_n("\342\204\242", 3));
+ assert_se(!utf8_is_valid_n("\342\204\242", 4));
+ assert_se( utf8_is_valid_n("<ZZ>", 0));
+ assert_se( utf8_is_valid_n("<ZZ>", 1));
+ assert_se( utf8_is_valid_n("<ZZ>", 2));
+ assert_se( utf8_is_valid_n("<ZZ>", 3));
+ assert_se( utf8_is_valid_n("<ZZ>", 4));
+ assert_se(!utf8_is_valid_n("<ZZ>", 5));
+}
+
static void test_utf8_is_valid(void) {
log_info("/* %s */", __func__);
}
int main(int argc, char *argv[]) {
+ test_utf8_n_is_valid();
test_utf8_is_valid();
test_utf8_is_printable();
test_ascii_is_valid();