Returns the size of the data when truncated to be less than or equal to
a specified size, making sure UTF-8 character boundaries are respected.
return uni_utf8_find_invalid_pos(data, size, &i) == 0;
}
+
+size_t uni_utf8_data_truncate(const unsigned char *data, size_t old_size,
+ size_t max_new_size)
+{
+ if (max_new_size >= old_size)
+ return old_size;
+ if (max_new_size == 0)
+ return 0;
+
+ if ((data[max_new_size] & 0x80) == 0)
+ return max_new_size;
+ while (max_new_size > 0 && (data[max_new_size-1] & 0xc0) == 0x80)
+ max_new_size--;
+ if (max_new_size > 0 && (data[max_new_size-1] & 0xc0) == 0xc0)
+ max_new_size--;
+ return max_new_size;
+}
bool uni_utf8_str_is_valid(const char *str);
/* Returns TRUE if data contains only valid UTF-8 input. */
bool uni_utf8_data_is_valid(const unsigned char *data, size_t size);
+/* Returns the size of the data when truncated to be less than or equal to
+ max_new_size, making sure UTF-8 character boundaries are respected. This only
+ looks at the last character at the new boundary. */
+size_t uni_utf8_data_truncate(const unsigned char *data, size_t old_size,
+ size_t max_new_size);
/* surrogate handling */
static inline unichar_t uni_join_surrogate(unichar_t high, unichar_t low)