1 From a96c0528c68093d155b674269a9c8bf48315fc01 Mon Sep 17 00:00:00 2001
2 From: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
3 Date: Tue, 24 Nov 2015 13:47:16 +1300
4 Subject: [PATCH 1/3] CVE-2015-5330: Fix handling of unicode near string
7 Until now next_codepoint_ext() and next_codepoint_handle_ext() were
8 using strnlen(str, 5) to determine how much string they should try to
9 decode. This ended up looking past the end of the string when it was not
10 null terminated and the final character looked like a multi-byte encoding.
11 The fix is to let the caller say how long the string can be.
13 Bug: https://bugzilla.samba.org/show_bug.cgi?id=11599
15 Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
16 Pair-programmed-with: Andrew Bartlett <abartlet@samba.org>
17 Reviewed-by: Ralph Boehme <slow@samba.org>
19 lib/util/charset/charset.h | 9 +++++----
20 lib/util/charset/codepoints.c | 19 +++++++++++++------
21 lib/util/charset/util_unistr.c | 5 ++++-
22 source3/lib/util_str.c | 2 +-
23 4 files changed, 23 insertions(+), 12 deletions(-)
25 diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h
26 index 474d77e..b70aa61 100644
27 --- a/lib/util/charset/charset.h
28 +++ b/lib/util/charset/charset.h
29 @@ -175,15 +175,16 @@ smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic,
30 charset_t from, charset_t to);
31 const char *charset_name(struct smb_iconv_convenience *ic, charset_t ch);
33 -codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
35 +codepoint_t next_codepoint_ext(const char *str, size_t len,
36 + charset_t src_charset, size_t *size);
37 codepoint_t next_codepoint(const char *str, size_t *size);
38 ssize_t push_codepoint(char *str, codepoint_t c);
41 codepoint_t next_codepoint_convenience_ext(struct smb_iconv_convenience *ic,
42 - const char *str, charset_t src_charset,
44 + const char *str, size_t len,
45 + charset_t src_charset,
47 codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic,
48 const char *str, size_t *size);
49 ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic,
50 diff --git a/lib/util/charset/codepoints.c b/lib/util/charset/codepoints.c
51 index 5ee95a8..8dd647e 100644
52 --- a/lib/util/charset/codepoints.c
53 +++ b/lib/util/charset/codepoints.c
54 @@ -346,7 +346,8 @@ smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic,
56 _PUBLIC_ codepoint_t next_codepoint_convenience_ext(
57 struct smb_iconv_convenience *ic,
58 - const char *str, charset_t src_charset,
59 + const char *str, size_t len,
60 + charset_t src_charset,
61 size_t *bytes_consumed)
63 /* it cannot occupy more than 4 bytes in UTF16 format */
64 @@ -366,7 +367,7 @@ _PUBLIC_ codepoint_t next_codepoint_convenience_ext(
65 * we assume that no multi-byte character can take more than 5 bytes.
66 * This is OK as we only support codepoints up to 1M (U+100000)
68 - ilen_orig = strnlen(str, 5);
69 + ilen_orig = MIN(len, 5);
72 descriptor = get_conv_handle(ic, src_charset, CH_UTF16);
73 @@ -424,7 +425,13 @@ _PUBLIC_ codepoint_t next_codepoint_convenience_ext(
74 _PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic,
75 const char *str, size_t *size)
77 - return next_codepoint_convenience_ext(ic, str, CH_UNIX, size);
79 + * We assume that no multi-byte character can take more than 5 bytes
80 + * thus avoiding walking all the way down a long string. This is OK as
81 + * Unicode codepoints only go up to (U+10ffff), which can always be
82 + * encoded in 4 bytes or less.
84 + return next_codepoint_convenience_ext(ic, str, strnlen(str, 5), CH_UNIX, size);
88 @@ -486,10 +493,10 @@ _PUBLIC_ ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic,
92 -_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
94 +_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, size_t len,
95 + charset_t src_charset, size_t *size)
97 - return next_codepoint_convenience_ext(get_iconv_convenience(), str,
98 + return next_codepoint_convenience_ext(get_iconv_convenience(), str, len,
102 diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c
103 index 760be77..d9e9b34 100644
104 --- a/lib/util/charset/util_unistr.c
105 +++ b/lib/util/charset/util_unistr.c
106 @@ -485,7 +485,10 @@ _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
108 while (n-- && *src) {
110 - codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
111 + codepoint_t c = next_codepoint_convenience_ext(iconv_convenience,
118 diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c
119 index 4701528..f8a5160 100644
120 --- a/source3/lib/util_str.c
121 +++ b/source3/lib/util_str.c
122 @@ -1486,7 +1486,7 @@ size_t strlen_m_ext(const char *s, const charset_t src_charset,
126 - codepoint_t c = next_codepoint_ext(s, src_charset, &c_size);
127 + codepoint_t c = next_codepoint_ext(s, strnlen(s, 5), src_charset, &c_size);
130 switch (dst_charset) {
135 From 8298252a1ba9c014f7ceb76736abb38132181f79 Mon Sep 17 00:00:00 2001
136 From: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
137 Date: Tue, 24 Nov 2015 13:54:09 +1300
138 Subject: [PATCH 2/3] CVE-2015-5330: next_codepoint_handle_ext: don't
139 short-circuit UTF16 low bytes
141 UTF16 contains zero bytes when it is encoding ASCII (for example), so we
142 can't assume the absense of the 0x80 bit means a one byte encoding. No
143 current callers use UTF16.
145 Bug: https://bugzilla.samba.org/show_bug.cgi?id=11599
147 Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
148 Pair-programmed-with: Andrew Bartlett <abartlet@samba.org>
149 Reviewed-by: Ralph Boehme <slow@samba.org>
151 lib/util/charset/codepoints.c | 5 ++++-
152 1 file changed, 4 insertions(+), 1 deletion(-)
154 diff --git a/lib/util/charset/codepoints.c b/lib/util/charset/codepoints.c
155 index 8dd647e..cf5f3e6 100644
156 --- a/lib/util/charset/codepoints.c
157 +++ b/lib/util/charset/codepoints.c
158 @@ -358,7 +358,10 @@ _PUBLIC_ codepoint_t next_codepoint_convenience_ext(
162 - if ((str[0] & 0x80) == 0) {
164 + if (((str[0] & 0x80) == 0) && (src_charset == CH_DOS ||
165 + src_charset == CH_UNIX ||
166 + src_charset == CH_UTF8)) {
168 return (codepoint_t)str[0];
174 From 0988b7cb606a7e4cd73fd8db02806abbc9d8f2e0 Mon Sep 17 00:00:00 2001
175 From: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
176 Date: Tue, 24 Nov 2015 13:49:09 +1300
177 Subject: [PATCH 3/3] CVE-2015-5330: strupper_talloc_n_handle(): properly count
180 When a codepoint eats more than one byte we really want to know,
181 especially if the string is not NUL terminated.
183 Bug: https://bugzilla.samba.org/show_bug.cgi?id=11599
185 Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
186 Pair-programmed-with: Andrew Bartlett <abartlet@samba.org>
187 Reviewed-by: Ralph Boehme <slow@samba.org>
189 lib/util/charset/util_unistr.c | 3 ++-
190 1 file changed, 2 insertions(+), 1 deletion(-)
192 diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c
193 index d9e9b34..6dad43f 100644
194 --- a/lib/util/charset/util_unistr.c
195 +++ b/lib/util/charset/util_unistr.c
196 @@ -483,13 +483,14 @@ _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
200 - while (n-- && *src) {
201 + while (n && *src) {
203 codepoint_t c = next_codepoint_convenience_ext(iconv_convenience,