lib: unichar: Add uni_utf8_data_truncate().

author Stephan Bosch <stephan.bosch@dovecot.fi>

Thu, 31 May 2018 22:02:16 +0000 (00:02 +0200)

committer Stephan Bosch <stephan.bosch@dovecot.fi>

Thu, 28 Jun 2018 19:31:18 +0000 (21:31 +0200)
author Stephan Bosch <stephan.bosch@dovecot.fi>
Thu, 31 May 2018 22:02:16 +0000 (00:02 +0200)
committer Stephan Bosch <stephan.bosch@dovecot.fi>
Thu, 28 Jun 2018 19:31:18 +0000 (21:31 +0200)
diff --git a/src/lib/unichar.c b/src/lib/unichar.c

index 0b6756afd685ee58bd1d29d6d4ce653c9e0b7491..29489a20142405b94c72de545f4b09bf612691be 100644 (file)
--- a/src/lib/unichar.c
+++ b/src/lib/unichar.c
@@ -428,3 +428,20 @@ bool uni_utf8_data_is_valid(const unsigned char *data, size_t size)
  
         return uni_utf8_find_invalid_pos(data, size, &i) == 0;
  }
+
+size_t uni_utf8_data_truncate(const unsigned char *data, size_t old_size,
+                             size_t max_new_size)
+{
+       if (max_new_size >= old_size)
+               return old_size;
+       if (max_new_size == 0)
+               return 0;
+
+       if ((data[max_new_size] & 0x80) == 0)
+               return max_new_size;
+       while (max_new_size > 0 && (data[max_new_size-1] & 0xc0) == 0x80)
+               max_new_size--;
+       if (max_new_size > 0 && (data[max_new_size-1] & 0xc0) == 0xc0)
+               max_new_size--;
+       return max_new_size;
+}
diff --git a/src/lib/unichar.h b/src/lib/unichar.h

index bdeaeffa0de60777dfe383945f9b4ed8207285cc..08df8e56f315fe4c7f3847c825fffbd47d07be0b 100644 (file)
--- a/src/lib/unichar.h
+++ b/src/lib/unichar.h
@@ -114,6 +114,11 @@ bool uni_utf8_get_valid_data(const unsigned char *input, size_t size,
  bool uni_utf8_str_is_valid(const char *str);
  /* Returns TRUE if data contains only valid UTF-8 input. */
  bool uni_utf8_data_is_valid(const unsigned char *data, size_t size);
+/* Returns the size of the data when truncated to be less than or equal to
+   max_new_size, making sure UTF-8 character boundaries are respected. This only
+   looks at the last character at the new boundary. */
+size_t uni_utf8_data_truncate(const unsigned char *data, size_t old_size,
+                             size_t max_new_size);
  
  /* surrogate handling */
  static inline unichar_t uni_join_surrogate(unichar_t high, unichar_t low)
author	Stephan Bosch <stephan.bosch@dovecot.fi>
	Thu, 31 May 2018 22:02:16 +0000 (00:02 +0200)
committer	Stephan Bosch <stephan.bosch@dovecot.fi>
	Thu, 28 Jun 2018 19:31:18 +0000 (21:31 +0200)
src/lib/unichar.c		patch \| blob \| blame \| history
src/lib/unichar.h		patch \| blob \| blame \| history