]> git.ipfire.org Git - thirdparty/glibc.git/blob - stdlib/canonicalize.c
stdlib: Avoid undefined behavior in stdlib/tst-labs
[thirdparty/glibc.git] / stdlib / canonicalize.c
1 /* Return the canonical absolute name of a given file.
2 Copyright (C) 1996-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #ifndef _LIBC
20 /* Don't use __attribute__ __nonnull__ in this compilation unit. Otherwise gcc
21 optimizes away the name == NULL test below. */
22 # define _GL_ARG_NONNULL(params)
23
24 # define _GL_USE_STDLIB_ALLOC 1
25 # include <libc-config.h>
26 #endif
27
28 /* Specification. */
29 #include <stdlib.h>
30
31 #include <errno.h>
32 #include <fcntl.h>
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <string.h>
36 #include <sys/stat.h>
37 #include <unistd.h>
38
39 #include <eloop-threshold.h>
40 #include <filename.h>
41 #include <idx.h>
42 #include <intprops.h>
43 #include <scratch_buffer.h>
44
45 #ifdef _LIBC
46 # include <shlib-compat.h>
47 # define GCC_LINT 1
48 # define _GL_ATTRIBUTE_PURE __attribute__ ((__pure__))
49 #else
50 # define __canonicalize_file_name canonicalize_file_name
51 # define __realpath realpath
52 # define __strdup strdup
53 # include "pathmax.h"
54 # define __faccessat faccessat
55 # if defined _WIN32 && !defined __CYGWIN__
56 # define __getcwd _getcwd
57 # elif HAVE_GETCWD
58 # if IN_RELOCWRAPPER
59 /* When building the relocatable program wrapper, use the system's getcwd
60 function, not the gnulib override, otherwise we would get a link error.
61 */
62 # undef getcwd
63 # endif
64 # if defined VMS && !defined getcwd
65 /* We want the directory in Unix syntax, not in VMS syntax.
66 The gnulib override of 'getcwd' takes 2 arguments; the original VMS
67 'getcwd' takes 3 arguments. */
68 # define __getcwd(buf, max) getcwd (buf, max, 0)
69 # else
70 # define __getcwd getcwd
71 # endif
72 # else
73 # define __getcwd(buf, max) getwd (buf)
74 # endif
75 # define __mempcpy mempcpy
76 # define __pathconf pathconf
77 # define __readlink readlink
78 # define __stat stat
79 #endif
80
81 /* Suppress bogus GCC -Wmaybe-uninitialized warnings. */
82 #if defined GCC_LINT || defined lint
83 # define IF_LINT(Code) Code
84 #else
85 # define IF_LINT(Code) /* empty */
86 #endif
87
88 #ifndef DOUBLE_SLASH_IS_DISTINCT_ROOT
89 # define DOUBLE_SLASH_IS_DISTINCT_ROOT false
90 #endif
91
92 #if defined _LIBC || !FUNC_REALPATH_WORKS
93
94 /* Return true if FILE's existence can be shown, false (setting errno)
95 otherwise. Follow symbolic links. */
96 static bool
97 file_accessible (char const *file)
98 {
99 # if defined _LIBC || HAVE_FACCESSAT
100 return __faccessat (AT_FDCWD, file, F_OK, AT_EACCESS) == 0;
101 # else
102 struct stat st;
103 return __stat (file, &st) == 0 || errno == EOVERFLOW;
104 # endif
105 }
106
107 /* True if concatenating END as a suffix to a file name means that the
108 code needs to check that the file name is that of a searchable
109 directory, since the canonicalize_filename_mode_stk code won't
110 check this later anyway when it checks an ordinary file name
111 component within END. END must either be empty, or start with a
112 slash. */
113
114 static bool _GL_ATTRIBUTE_PURE
115 suffix_requires_dir_check (char const *end)
116 {
117 /* If END does not start with a slash, the suffix is OK. */
118 while (ISSLASH (*end))
119 {
120 /* Two or more slashes act like a single slash. */
121 do
122 end++;
123 while (ISSLASH (*end));
124
125 switch (*end++)
126 {
127 default: return false; /* An ordinary file name component is OK. */
128 case '\0': return true; /* Trailing "/" is trouble. */
129 case '.': break; /* Possibly "." or "..". */
130 }
131 /* Trailing "/.", or "/.." even if not trailing, is trouble. */
132 if (!*end || (*end == '.' && (!end[1] || ISSLASH (end[1]))))
133 return true;
134 }
135
136 return false;
137 }
138
139 /* Append this to a file name to test whether it is a searchable directory.
140 On POSIX platforms "/" suffices, but "/./" is sometimes needed on
141 macOS 10.13 <https://bugs.gnu.org/30350>, and should also work on
142 platforms like AIX 7.2 that need at least "/.". */
143
144 #if defined _LIBC || defined LSTAT_FOLLOWS_SLASHED_SYMLINK
145 static char const dir_suffix[] = "/";
146 #else
147 static char const dir_suffix[] = "/./";
148 #endif
149
150 /* Return true if DIR is a searchable dir, false (setting errno) otherwise.
151 DIREND points to the NUL byte at the end of the DIR string.
152 Store garbage into DIREND[0 .. strlen (dir_suffix)]. */
153
154 static bool
155 dir_check (char *dir, char *dirend)
156 {
157 strcpy (dirend, dir_suffix);
158 return file_accessible (dir);
159 }
160
161 static idx_t
162 get_path_max (void)
163 {
164 # ifdef PATH_MAX
165 long int path_max = PATH_MAX;
166 # else
167 /* The caller invoked realpath with a null RESOLVED, even though
168 PATH_MAX is not defined as a constant. The glibc manual says
169 programs should not do this, and POSIX says the behavior is undefined.
170 Historically, glibc here used the result of pathconf, or 1024 if that
171 failed; stay consistent with this (dubious) historical practice. */
172 int err = errno;
173 long int path_max = __pathconf ("/", _PC_PATH_MAX);
174 __set_errno (err);
175 # endif
176 return path_max < 0 ? 1024 : path_max <= IDX_MAX ? path_max : IDX_MAX;
177 }
178
179 /* Scratch buffers used by realpath_stk and managed by __realpath. */
180 struct realpath_bufs
181 {
182 struct scratch_buffer rname;
183 struct scratch_buffer extra;
184 struct scratch_buffer link;
185 };
186
187 static char *
188 realpath_stk (const char *name, char *resolved, struct realpath_bufs *bufs)
189 {
190 char *dest;
191 char const *start;
192 char const *end;
193 int num_links = 0;
194
195 if (name == NULL)
196 {
197 /* As per Single Unix Specification V2 we must return an error if
198 either parameter is a null pointer. We extend this to allow
199 the RESOLVED parameter to be NULL in case the we are expected to
200 allocate the room for the return value. */
201 __set_errno (EINVAL);
202 return NULL;
203 }
204
205 if (name[0] == '\0')
206 {
207 /* As per Single Unix Specification V2 we must return an error if
208 the name argument points to an empty string. */
209 __set_errno (ENOENT);
210 return NULL;
211 }
212
213 char *rname = bufs->rname.data;
214 bool end_in_extra_buffer = false;
215 bool failed = true;
216
217 /* This is always zero for Posix hosts, but can be 2 for MS-Windows
218 and MS-DOS X:/foo/bar file names. */
219 idx_t prefix_len = FILE_SYSTEM_PREFIX_LEN (name);
220
221 if (!IS_ABSOLUTE_FILE_NAME (name))
222 {
223 while (!__getcwd (bufs->rname.data, bufs->rname.length))
224 {
225 if (errno != ERANGE)
226 {
227 dest = rname;
228 goto error;
229 }
230 if (!scratch_buffer_grow (&bufs->rname))
231 return NULL;
232 rname = bufs->rname.data;
233 }
234 dest = strchr (rname, '\0');
235 start = name;
236 prefix_len = FILE_SYSTEM_PREFIX_LEN (rname);
237 }
238 else
239 {
240 dest = __mempcpy (rname, name, prefix_len);
241 *dest++ = '/';
242 if (DOUBLE_SLASH_IS_DISTINCT_ROOT)
243 {
244 if (prefix_len == 0 /* implies ISSLASH (name[0]) */
245 && ISSLASH (name[1]) && !ISSLASH (name[2]))
246 *dest++ = '/';
247 *dest = '\0';
248 }
249 start = name + prefix_len;
250 }
251
252 for ( ; *start; start = end)
253 {
254 /* Skip sequence of multiple file name separators. */
255 while (ISSLASH (*start))
256 ++start;
257
258 /* Find end of component. */
259 for (end = start; *end && !ISSLASH (*end); ++end)
260 /* Nothing. */;
261
262 /* Length of this file name component; it can be zero if a file
263 name ends in '/'. */
264 idx_t startlen = end - start;
265
266 if (startlen == 0)
267 break;
268 else if (startlen == 1 && start[0] == '.')
269 /* nothing */;
270 else if (startlen == 2 && start[0] == '.' && start[1] == '.')
271 {
272 /* Back up to previous component, ignore if at root already. */
273 if (dest > rname + prefix_len + 1)
274 for (--dest; dest > rname && !ISSLASH (dest[-1]); --dest)
275 continue;
276 if (DOUBLE_SLASH_IS_DISTINCT_ROOT
277 && dest == rname + 1 && !prefix_len
278 && ISSLASH (*dest) && !ISSLASH (dest[1]))
279 dest++;
280 }
281 else
282 {
283 if (!ISSLASH (dest[-1]))
284 *dest++ = '/';
285
286 while (rname + bufs->rname.length - dest
287 < startlen + sizeof dir_suffix)
288 {
289 idx_t dest_offset = dest - rname;
290 if (!scratch_buffer_grow_preserve (&bufs->rname))
291 return NULL;
292 rname = bufs->rname.data;
293 dest = rname + dest_offset;
294 }
295
296 dest = __mempcpy (dest, start, startlen);
297 *dest = '\0';
298
299 char *buf;
300 ssize_t n;
301 while (true)
302 {
303 buf = bufs->link.data;
304 idx_t bufsize = bufs->link.length;
305 n = __readlink (rname, buf, bufsize - 1);
306 if (n < bufsize - 1)
307 break;
308 if (!scratch_buffer_grow (&bufs->link))
309 return NULL;
310 }
311 if (0 <= n)
312 {
313 if (++num_links > __eloop_threshold ())
314 {
315 __set_errno (ELOOP);
316 goto error;
317 }
318
319 buf[n] = '\0';
320
321 char *extra_buf = bufs->extra.data;
322 idx_t end_idx IF_LINT (= 0);
323 if (end_in_extra_buffer)
324 end_idx = end - extra_buf;
325 size_t len = strlen (end);
326 if (INT_ADD_OVERFLOW (len, n))
327 {
328 __set_errno (ENOMEM);
329 return NULL;
330 }
331 while (bufs->extra.length <= len + n)
332 {
333 if (!scratch_buffer_grow_preserve (&bufs->extra))
334 return NULL;
335 extra_buf = bufs->extra.data;
336 }
337 if (end_in_extra_buffer)
338 end = extra_buf + end_idx;
339
340 /* Careful here, end may be a pointer into extra_buf... */
341 memmove (&extra_buf[n], end, len + 1);
342 name = end = memcpy (extra_buf, buf, n);
343 end_in_extra_buffer = true;
344
345 if (IS_ABSOLUTE_FILE_NAME (buf))
346 {
347 idx_t pfxlen = FILE_SYSTEM_PREFIX_LEN (buf);
348
349 dest = __mempcpy (rname, buf, pfxlen);
350 *dest++ = '/'; /* It's an absolute symlink */
351 if (DOUBLE_SLASH_IS_DISTINCT_ROOT)
352 {
353 if (ISSLASH (buf[1]) && !ISSLASH (buf[2]) && !pfxlen)
354 *dest++ = '/';
355 *dest = '\0';
356 }
357 /* Install the new prefix to be in effect hereafter. */
358 prefix_len = pfxlen;
359 }
360 else
361 {
362 /* Back up to previous component, ignore if at root
363 already: */
364 if (dest > rname + prefix_len + 1)
365 for (--dest; dest > rname && !ISSLASH (dest[-1]); --dest)
366 continue;
367 if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1
368 && ISSLASH (*dest) && !ISSLASH (dest[1]) && !prefix_len)
369 dest++;
370 }
371 }
372 else if (! (suffix_requires_dir_check (end)
373 ? dir_check (rname, dest)
374 : errno == EINVAL))
375 goto error;
376 }
377 }
378 if (dest > rname + prefix_len + 1 && ISSLASH (dest[-1]))
379 --dest;
380 if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1 && !prefix_len
381 && ISSLASH (*dest) && !ISSLASH (dest[1]))
382 dest++;
383 failed = false;
384
385 error:
386 *dest++ = '\0';
387 if (resolved != NULL)
388 {
389 /* Copy the full result on success or partial result if failure was due
390 to the path not existing or not being accessible. */
391 if ((!failed || errno == ENOENT || errno == EACCES)
392 && dest - rname <= get_path_max ())
393 {
394 strcpy (resolved, rname);
395 if (failed)
396 return NULL;
397 else
398 return resolved;
399 }
400 if (!failed)
401 __set_errno (ENAMETOOLONG);
402 return NULL;
403 }
404 else
405 {
406 if (failed)
407 return NULL;
408 else
409 return __strdup (bufs->rname.data);
410 }
411 }
412
413 /* Return the canonical absolute name of file NAME. A canonical name
414 does not contain any ".", ".." components nor any repeated file name
415 separators ('/') or symlinks. All file name components must exist. If
416 RESOLVED is null, the result is malloc'd; otherwise, if the
417 canonical name is PATH_MAX chars or more, returns null with 'errno'
418 set to ENAMETOOLONG; if the name fits in fewer than PATH_MAX chars,
419 returns the name in RESOLVED. If the name cannot be resolved and
420 RESOLVED is non-NULL, it contains the name of the first component
421 that cannot be resolved. If the name can be resolved, RESOLVED
422 holds the same value as the value returned. */
423
424 char *
425 __realpath (const char *name, char *resolved)
426 {
427 struct realpath_bufs bufs;
428 scratch_buffer_init (&bufs.rname);
429 scratch_buffer_init (&bufs.extra);
430 scratch_buffer_init (&bufs.link);
431 char *result = realpath_stk (name, resolved, &bufs);
432 scratch_buffer_free (&bufs.link);
433 scratch_buffer_free (&bufs.extra);
434 scratch_buffer_free (&bufs.rname);
435 return result;
436 }
437 libc_hidden_def (__realpath)
438 versioned_symbol (libc, __realpath, realpath, GLIBC_2_3);
439 #endif /* !FUNC_REALPATH_WORKS || defined _LIBC */
440
441
442 #if SHLIB_COMPAT(libc, GLIBC_2_0, GLIBC_2_3)
443 char *
444 attribute_compat_text_section
445 __old_realpath (const char *name, char *resolved)
446 {
447 if (resolved == NULL)
448 {
449 __set_errno (EINVAL);
450 return NULL;
451 }
452
453 return __realpath (name, resolved);
454 }
455 compat_symbol (libc, __old_realpath, realpath, GLIBC_2_0);
456 #endif
457
458
459 char *
460 __canonicalize_file_name (const char *name)
461 {
462 return __realpath (name, NULL);
463 }
464 weak_alias (__canonicalize_file_name, canonicalize_file_name)