]>
Commit | Line | Data |
---|---|---|
4e155ec4 AP |
1 | /* |
2 | * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. | |
3 | * | |
dffa7520 | 4 | * Licensed under the Apache License 2.0 (the "License"). You may not use |
4e155ec4 AP |
5 | * this file except in compliance with the License. You can obtain a copy |
6 | * in the file LICENSE in the source distribution or at | |
7 | * https://www.openssl.org/source/license.html | |
8 | */ | |
9 | ||
10 | #include <windows.h> | |
11 | #include <stdlib.h> | |
12 | #include <string.h> | |
13 | #include <malloc.h> | |
14 | ||
15 | #if defined(CP_UTF8) | |
16 | ||
17 | static UINT saved_cp; | |
18 | static int newargc; | |
19 | static char **newargv; | |
20 | ||
21 | static void cleanup(void) | |
22 | { | |
23 | int i; | |
24 | ||
25 | SetConsoleOutputCP(saved_cp); | |
26 | ||
27 | for (i = 0; i < newargc; i++) | |
28 | free(newargv[i]); | |
29 | ||
30 | free(newargv); | |
31 | } | |
32 | ||
33 | /* | |
34 | * Incrementally [re]allocate newargv and keep it NULL-terminated. | |
35 | */ | |
36 | static int validate_argv(int argc) | |
37 | { | |
38 | static int size = 0; | |
39 | ||
40 | if (argc >= size) { | |
41 | char **ptr; | |
42 | ||
43 | while (argc >= size) | |
44 | size += 64; | |
45 | ||
46 | ptr = realloc(newargv, size * sizeof(newargv[0])); | |
47 | if (ptr == NULL) | |
48 | return 0; | |
49 | ||
50 | (newargv = ptr)[argc] = NULL; | |
51 | } else { | |
52 | newargv[argc] = NULL; | |
53 | } | |
54 | ||
55 | return 1; | |
56 | } | |
57 | ||
58 | static int process_glob(WCHAR *wstr, int wlen) | |
59 | { | |
60 | int i, slash, udlen; | |
61 | WCHAR saved_char; | |
62 | WIN32_FIND_DATAW data; | |
63 | HANDLE h; | |
64 | ||
65 | /* | |
66 | * Note that we support wildcard characters only in filename part | |
67 | * of the path, and not in directories. Windows users are used to | |
68 | * this, that's why recursive glob processing is not implemented. | |
69 | */ | |
70 | /* | |
71 | * Start by looking for last slash or backslash, ... | |
72 | */ | |
73 | for (slash = 0, i = 0; i < wlen; i++) | |
74 | if (wstr[i] == L'/' || wstr[i] == L'\\') | |
75 | slash = i + 1; | |
76 | /* | |
77 | * ... then look for asterisk or question mark in the file name. | |
78 | */ | |
79 | for (i = slash; i < wlen; i++) | |
80 | if (wstr[i] == L'*' || wstr[i] == L'?') | |
81 | break; | |
82 | ||
83 | if (i == wlen) | |
84 | return 0; /* definitely not a glob */ | |
85 | ||
86 | saved_char = wstr[wlen]; | |
87 | wstr[wlen] = L'\0'; | |
88 | h = FindFirstFileW(wstr, &data); | |
89 | wstr[wlen] = saved_char; | |
90 | if (h == INVALID_HANDLE_VALUE) | |
91 | return 0; /* not a valid glob, just pass... */ | |
92 | ||
93 | if (slash) | |
94 | udlen = WideCharToMultiByte(CP_UTF8, 0, wstr, slash, | |
95 | NULL, 0, NULL, NULL); | |
96 | else | |
97 | udlen = 0; | |
98 | ||
99 | do { | |
100 | int uflen; | |
101 | char *arg; | |
102 | ||
103 | /* | |
104 | * skip over . and .. | |
105 | */ | |
106 | if (data.cFileName[0] == L'.') { | |
107 | if ((data.cFileName[1] == L'\0') || | |
108 | (data.cFileName[1] == L'.' && data.cFileName[2] == L'\0')) | |
109 | continue; | |
110 | } | |
111 | ||
112 | if (!validate_argv(newargc + 1)) | |
113 | break; | |
114 | ||
115 | /* | |
116 | * -1 below means "scan for trailing '\0' *and* count it", | |
117 | * so that |uflen| covers even trailing '\0'. | |
118 | */ | |
119 | uflen = WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1, | |
120 | NULL, 0, NULL, NULL); | |
121 | ||
122 | arg = malloc(udlen + uflen); | |
123 | if (arg == NULL) | |
124 | break; | |
125 | ||
126 | if (udlen) | |
127 | WideCharToMultiByte(CP_UTF8, 0, wstr, slash, | |
128 | arg, udlen, NULL, NULL); | |
129 | ||
130 | WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1, | |
131 | arg + udlen, uflen, NULL, NULL); | |
132 | ||
133 | newargv[newargc++] = arg; | |
134 | } while (FindNextFileW(h, &data)); | |
135 | ||
136 | CloseHandle(h); | |
137 | ||
138 | return 1; | |
139 | } | |
140 | ||
141 | void win32_utf8argv(int *argc, char **argv[]) | |
142 | { | |
143 | const WCHAR *wcmdline; | |
144 | WCHAR *warg, *wend, *p; | |
145 | int wlen, ulen, valid = 1; | |
146 | char *arg; | |
147 | ||
fb5d9f1d AP |
148 | if (GetEnvironmentVariableW(L"OPENSSL_WIN32_UTF8", NULL, 0) == 0) |
149 | return; | |
150 | ||
4e155ec4 AP |
151 | newargc = 0; |
152 | newargv = NULL; | |
153 | if (!validate_argv(newargc)) | |
154 | return; | |
155 | ||
156 | wcmdline = GetCommandLineW(); | |
157 | if (wcmdline == NULL) return; | |
158 | ||
159 | /* | |
160 | * make a copy of the command line, since we might have to modify it... | |
161 | */ | |
162 | wlen = wcslen(wcmdline); | |
163 | p = _alloca((wlen + 1) * sizeof(WCHAR)); | |
164 | wcscpy(p, wcmdline); | |
165 | ||
166 | while (*p != L'\0') { | |
167 | int in_quote = 0; | |
168 | ||
169 | if (*p == L' ' || *p == L'\t') { | |
170 | p++; /* skip over white spaces */ | |
171 | continue; | |
172 | } | |
173 | ||
174 | /* | |
175 | * Note: because we may need to fiddle with the number of backslashes, | |
176 | * the argument string is copied into itself. This is safe because | |
177 | * the number of characters will never expand. | |
178 | */ | |
179 | warg = wend = p; | |
180 | while (*p != L'\0' | |
181 | && (in_quote || (*p != L' ' && *p != L'\t'))) { | |
182 | switch (*p) { | |
183 | case L'\\': | |
184 | /* | |
185 | * Microsoft documentation on how backslashes are treated | |
186 | * is: | |
187 | * | |
188 | * + Backslashes are interpreted literally, unless they | |
189 | * immediately precede a double quotation mark. | |
190 | * + If an even number of backslashes is followed by a double | |
191 | * quotation mark, one backslash is placed in the argv array | |
192 | * for every pair of backslashes, and the double quotation | |
193 | * mark is interpreted as a string delimiter. | |
194 | * + If an odd number of backslashes is followed by a double | |
195 | * quotation mark, one backslash is placed in the argv array | |
196 | * for every pair of backslashes, and the double quotation | |
197 | * mark is "escaped" by the remaining backslash, causing a | |
198 | * literal double quotation mark (") to be placed in argv. | |
199 | * | |
200 | * Ref: https://msdn.microsoft.com/en-us/library/17w5ykft.aspx | |
201 | * | |
202 | * Though referred page doesn't mention it, multiple qouble | |
203 | * quotes are also special. Pair of double quotes in quoted | |
204 | * string is counted as single double quote. | |
205 | */ | |
206 | { | |
207 | const WCHAR *q = p; | |
208 | int i; | |
209 | ||
210 | while (*p == L'\\') | |
211 | p++; | |
212 | ||
213 | if (*p == L'"') { | |
214 | int i; | |
215 | ||
216 | for (i = (p - q) / 2; i > 0; i--) | |
217 | *wend++ = L'\\'; | |
218 | ||
219 | /* | |
220 | * if odd amount of backslashes before the quote, | |
221 | * said quote is part of the argument, not a delimiter | |
222 | */ | |
223 | if ((p - q) % 2 == 1) | |
224 | *wend++ = *p++; | |
225 | } else { | |
226 | for (i = p - q; i > 0; i--) | |
227 | *wend++ = L'\\'; | |
228 | } | |
229 | } | |
230 | break; | |
231 | case L'"': | |
232 | /* | |
233 | * Without the preceding backslash (or when preceded with an | |
234 | * even number of backslashes), the double quote is a simple | |
235 | * string delimiter and just slightly change the parsing state | |
236 | */ | |
237 | if (in_quote && p[1] == L'"') | |
238 | *wend++ = *p++; | |
239 | else | |
240 | in_quote = !in_quote; | |
241 | p++; | |
242 | break; | |
243 | default: | |
244 | /* | |
245 | * Any other non-delimiter character is just taken verbatim | |
246 | */ | |
247 | *wend++ = *p++; | |
248 | } | |
249 | } | |
250 | ||
251 | wlen = wend - warg; | |
252 | ||
253 | if (wlen == 0 || !process_glob(warg, wlen)) { | |
254 | if (!validate_argv(newargc + 1)) { | |
255 | valid = 0; | |
256 | break; | |
257 | } | |
258 | ||
259 | ulen = 0; | |
260 | if (wlen > 0) { | |
261 | ulen = WideCharToMultiByte(CP_UTF8, 0, warg, wlen, | |
262 | NULL, 0, NULL, NULL); | |
263 | if (ulen <= 0) | |
264 | continue; | |
265 | } | |
266 | ||
267 | arg = malloc(ulen + 1); | |
268 | if (arg == NULL) { | |
269 | valid = 0; | |
270 | break; | |
271 | } | |
272 | ||
273 | if (wlen > 0) | |
274 | WideCharToMultiByte(CP_UTF8, 0, warg, wlen, | |
275 | arg, ulen, NULL, NULL); | |
276 | arg[ulen] = '\0'; | |
277 | ||
278 | newargv[newargc++] = arg; | |
279 | } | |
280 | } | |
281 | ||
282 | if (valid) { | |
283 | saved_cp = GetConsoleOutputCP(); | |
284 | SetConsoleOutputCP(CP_UTF8); | |
285 | ||
286 | *argc = newargc; | |
287 | *argv = newargv; | |
288 | ||
289 | atexit(cleanup); | |
290 | } else if (newargv != NULL) { | |
291 | int i; | |
292 | ||
293 | for (i = 0; i < newargc; i++) | |
294 | free(newargv[i]); | |
295 | ||
296 | free(newargv); | |
297 | ||
298 | newargc = 0; | |
299 | newargv = NULL; | |
300 | } | |
301 | ||
302 | return; | |
303 | } | |
304 | #else | |
10acff61 | 305 | void win32_utf8argv(int *argc, char **argv[]) |
4e155ec4 AP |
306 | { return; } |
307 | #endif |