]> git.ipfire.org Git - thirdparty/tar.git/blame - lib/wordsplit.c
Silence gcc warnings in wordsplit
[thirdparty/tar.git] / lib / wordsplit.c
CommitLineData
7b5e8039 1/* wordsplit - a word splitter
c7b3f021 2 Copyright (C) 2009-2018 Sergey Poznyakoff
7b5e8039
SP
3
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3 of the License, or (at your
7 option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
c7b3f021 15 with this program. If not, see <http://www.gnu.org/licenses/>. */
7b5e8039
SP
16
17#ifdef HAVE_CONFIG_H
18# include <config.h>
19#endif
20
21#include <errno.h>
22#include <ctype.h>
23#include <unistd.h>
24#include <stdlib.h>
25#include <string.h>
26#include <stdio.h>
27#include <stdarg.h>
c7b3f021
SP
28#include <pwd.h>
29#include <glob.h>
7b5e8039
SP
30
31#if ENABLE_NLS
32# include <gettext.h>
33#else
34# define gettext(msgid) msgid
35#endif
36#define _(msgid) gettext (msgid)
37#define N_(msgid) msgid
38
e5474174
SP
39#ifndef FALLTHROUGH
40# if __GNUC__ < 7
41# define FALLTHROUGH ((void) 0)
42# else
43# define FALLTHROUGH __attribute__ ((__fallthrough__))
44# endif
45#endif
46
7b5e8039
SP
47#include <wordsplit.h>
48
49#define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
50#define ISDELIM(ws,c) \
51 (strchr ((ws)->ws_delim, (c)) != NULL)
52#define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL)
53#define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z')
54#define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z')
55#define ISALPHA(c) (ISUPPER(c) || ISLOWER(c))
56#define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9')
57#define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL)
58#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
59#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
60
c7b3f021
SP
61#define ISVARBEG(c) (ISALPHA(c) || c == '_')
62#define ISVARCHR(c) (ISALNUM(c) || c == '_')
63
64#define WSP_RETURN_DELIMS(wsp) \
65 ((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS))
66
7b5e8039
SP
67#define ALLOC_INIT 128
68#define ALLOC_INCR 128
69
70static void
71_wsplt_alloc_die (struct wordsplit *wsp)
72{
c7b3f021 73 wsp->ws_error ("%s", _("memory exhausted"));
7b5e8039
SP
74 abort ();
75}
76
55fb2fc3 77static void __WORDSPLIT_ATTRIBUTE_FORMAT ((__printf__, 1, 2))
7b5e8039
SP
78_wsplt_error (const char *fmt, ...)
79{
80 va_list ap;
81
82 va_start (ap, fmt);
83 vfprintf (stderr, fmt, ap);
84 va_end (ap);
85 fputc ('\n', stderr);
86}
87
88static void wordsplit_free_nodes (struct wordsplit *);
89
c7b3f021
SP
90static int
91_wsplt_seterr (struct wordsplit *wsp, int ec)
92{
93 wsp->ws_errno = ec;
94 if (wsp->ws_flags & WRDSF_SHOWERR)
95 wordsplit_perror (wsp);
96 return ec;
97}
98
7b5e8039
SP
99static int
100_wsplt_nomem (struct wordsplit *wsp)
101{
102 errno = ENOMEM;
103 wsp->ws_errno = WRDSE_NOSPACE;
104 if (wsp->ws_flags & WRDSF_ENOMEMABRT)
105 wsp->ws_alloc_die (wsp);
106 if (wsp->ws_flags & WRDSF_SHOWERR)
107 wordsplit_perror (wsp);
108 if (!(wsp->ws_flags & WRDSF_REUSE))
109 wordsplit_free (wsp);
110 wordsplit_free_nodes (wsp);
111 return wsp->ws_errno;
112}
113
c7b3f021
SP
114static int wordsplit_run (const char *command, size_t length,
115 struct wordsplit *wsp,
e5474174 116 unsigned flags, int lvl);
c7b3f021
SP
117
118static int wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
e5474174 119 unsigned flags);
c7b3f021
SP
120static int wordsplit_process_list (struct wordsplit *wsp, size_t start);
121static int wordsplit_finish (struct wordsplit *wsp);
122
123static int
124_wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss,
125 char const *str, int len,
e5474174 126 unsigned flags, int finalize)
c7b3f021
SP
127{
128 int rc;
129
130 wss->ws_delim = wsp->ws_delim;
131 wss->ws_debug = wsp->ws_debug;
132 wss->ws_error = wsp->ws_error;
133 wss->ws_alloc_die = wsp->ws_alloc_die;
134
135 if (!(flags & WRDSF_NOVAR))
136 {
137 wss->ws_env = wsp->ws_env;
138 wss->ws_getvar = wsp->ws_getvar;
139 flags |= wsp->ws_flags & (WRDSF_ENV | WRDSF_ENV_KV | WRDSF_GETVAR);
140 }
141 if (!(flags & WRDSF_NOCMD))
142 {
143 wss->ws_command = wsp->ws_command;
144 }
145
146 if ((flags & (WRDSF_NOVAR|WRDSF_NOCMD)) != (WRDSF_NOVAR|WRDSF_NOCMD))
147 {
148 wss->ws_closure = wsp->ws_closure;
149 flags |= wsp->ws_flags & WRDSF_CLOSURE;
150 }
151
152 wss->ws_options = wsp->ws_options;
153
154 flags |= WRDSF_DELIM
155 | WRDSF_ALLOC_DIE
156 | WRDSF_ERROR
157 | WRDSF_DEBUG
158 | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR | WRDSF_OPTIONS));
159
160 rc = wordsplit_init (wss, str, len, flags);
161 if (rc)
162 return rc;
163 wss->ws_lvl = wsp->ws_lvl + 1;
164 rc = wordsplit_process_list (wss, 0);
165 if (rc)
166 {
167 wordsplit_free_nodes (wss);
168 return rc;
169 }
170 if (finalize)
171 {
172 rc = wordsplit_finish (wss);
173 wordsplit_free_nodes (wss);
174 }
175 return rc;
176}
177
178static void
179_wsplt_seterr_sub (struct wordsplit *wsp, struct wordsplit *wss)
180{
181 if (wsp->ws_errno == WRDSE_USERERR)
182 free (wsp->ws_usererr);
183 wsp->ws_errno = wss->ws_errno;
184 if (wss->ws_errno == WRDSE_USERERR)
185 {
186 wsp->ws_usererr = wss->ws_usererr;
187 wss->ws_errno = WRDSE_EOF;
188 wss->ws_usererr = NULL;
189 }
190}
191
7b5e8039
SP
192static void
193wordsplit_init0 (struct wordsplit *wsp)
194{
195 if (wsp->ws_flags & WRDSF_REUSE)
196 {
197 if (!(wsp->ws_flags & WRDSF_APPEND))
198 wordsplit_free_words (wsp);
c7b3f021 199 wordsplit_clearerr (wsp);
7b5e8039
SP
200 }
201 else
202 {
203 wsp->ws_wordv = NULL;
204 wsp->ws_wordc = 0;
205 wsp->ws_wordn = 0;
206 }
207
208 wsp->ws_errno = 0;
7b5e8039
SP
209}
210
c7b3f021
SP
211char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
212
7b5e8039
SP
213static int
214wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
e5474174 215 unsigned flags)
7b5e8039
SP
216{
217 wsp->ws_flags = flags;
218
219 if (!(wsp->ws_flags & WRDSF_ALLOC_DIE))
220 wsp->ws_alloc_die = _wsplt_alloc_die;
221 if (!(wsp->ws_flags & WRDSF_ERROR))
222 wsp->ws_error = _wsplt_error;
223
c7b3f021 224 if (!(wsp->ws_flags & WRDSF_NOVAR))
7b5e8039 225 {
c7b3f021
SP
226 /* These will be initialized on first variable assignment */
227 wsp->ws_envidx = wsp->ws_envsiz = 0;
228 wsp->ws_envbuf = NULL;
7b5e8039
SP
229 }
230
231 if (!(wsp->ws_flags & WRDSF_NOCMD))
232 {
c7b3f021
SP
233 if (!wsp->ws_command)
234 {
235 _wsplt_seterr (wsp, WRDSE_USAGE);
236 errno = EINVAL;
237 return wsp->ws_errno;
238 }
7b5e8039
SP
239 }
240
241 if (wsp->ws_flags & WRDSF_SHOWDBG)
242 {
243 if (!(wsp->ws_flags & WRDSF_DEBUG))
244 {
245 if (wsp->ws_flags & WRDSF_ERROR)
246 wsp->ws_debug = wsp->ws_error;
247 else if (wsp->ws_flags & WRDSF_SHOWERR)
248 wsp->ws_debug = _wsplt_error;
249 else
250 wsp->ws_flags &= ~WRDSF_SHOWDBG;
251 }
252 }
253
254 wsp->ws_input = input;
255 wsp->ws_len = len;
256
257 if (!(wsp->ws_flags & WRDSF_DOOFFS))
258 wsp->ws_offs = 0;
259
260 if (!(wsp->ws_flags & WRDSF_DELIM))
261 wsp->ws_delim = " \t\n";
262
263 if (!(wsp->ws_flags & WRDSF_COMMENT))
264 wsp->ws_comment = NULL;
265
266 if (!(wsp->ws_flags & WRDSF_CLOSURE))
267 wsp->ws_closure = NULL;
268
c7b3f021
SP
269 if (!(wsp->ws_flags & WRDSF_OPTIONS))
270 wsp->ws_options = 0;
271
272 if (wsp->ws_flags & WRDSF_ESCAPE)
273 {
274 if (!wsp->ws_escape[WRDSX_WORD])
275 wsp->ws_escape[WRDSX_WORD] = "";
276 if (!wsp->ws_escape[WRDSX_QUOTE])
277 wsp->ws_escape[WRDSX_QUOTE] = "";
278 }
279 else
280 {
281 if (wsp->ws_flags & WRDSF_CESCAPES)
282 {
283 wsp->ws_escape[WRDSX_WORD] = wordsplit_c_escape_tab;
284 wsp->ws_escape[WRDSX_QUOTE] = wordsplit_c_escape_tab;
285 wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD
286 | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD;
287 }
288 else
289 {
290 wsp->ws_escape[WRDSX_WORD] = "";
291 wsp->ws_escape[WRDSX_QUOTE] = "\\\\\"\"";
292 wsp->ws_options |= WRDSO_BSKEEP_QUOTE;
293 }
294 }
295
7b5e8039 296 wsp->ws_endp = 0;
c7b3f021 297 wsp->ws_wordi = 0;
7b5e8039 298
c7b3f021
SP
299 if (wsp->ws_flags & WRDSF_REUSE)
300 wordsplit_free_nodes (wsp);
301 wsp->ws_head = wsp->ws_tail = NULL;
302
7b5e8039 303 wordsplit_init0 (wsp);
c7b3f021 304
7b5e8039
SP
305 return 0;
306}
307
308static int
309alloc_space (struct wordsplit *wsp, size_t count)
310{
311 size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0;
312 char **ptr;
313 size_t newalloc;
314
315 if (wsp->ws_wordv == NULL)
316 {
317 newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT;
318 ptr = calloc (newalloc, sizeof (ptr[0]));
319 }
320 else if (wsp->ws_wordn < offs + wsp->ws_wordc + count)
321 {
322 newalloc = offs + wsp->ws_wordc +
323 (count > ALLOC_INCR ? count : ALLOC_INCR);
324 ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0]));
325 }
326 else
327 return 0;
328
329 if (ptr)
330 {
331 wsp->ws_wordn = newalloc;
332 wsp->ws_wordv = ptr;
333 }
334 else
335 return _wsplt_nomem (wsp);
336 return 0;
337}
338\f
339
340/* Node state flags */
341#define _WSNF_NULL 0x01 /* null node (a noop) */
342#define _WSNF_WORD 0x02 /* node contains word in v.word */
343#define _WSNF_QUOTE 0x04 /* text is quoted */
344#define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
345#define _WSNF_JOIN 0x10 /* node must be joined with the next node */
346#define _WSNF_SEXP 0x20 /* is a sed expression */
c7b3f021 347#define _WSNF_DELIM 0x40 /* node is a delimiter */
7b5e8039
SP
348
349#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
350 wordsplit_add_segm must add the
351 segment even if it is empty */
352
353struct wordsplit_node
354{
355 struct wordsplit_node *prev; /* Previous element */
356 struct wordsplit_node *next; /* Next element */
e5474174 357 unsigned flags; /* Node flags */
7b5e8039
SP
358 union
359 {
360 struct
361 {
362 size_t beg; /* Start of word in ws_input */
363 size_t end; /* End of word in ws_input */
364 } segm;
365 char *word;
366 } v;
367};
368
369static const char *
e5474174 370wsnode_flagstr (unsigned flags)
7b5e8039 371{
c7b3f021 372 static char retbuf[7];
7b5e8039
SP
373 char *p = retbuf;
374
375 if (flags & _WSNF_WORD)
376 *p++ = 'w';
377 else if (flags & _WSNF_NULL)
378 *p++ = 'n';
379 else
380 *p++ = '-';
381 if (flags & _WSNF_QUOTE)
382 *p++ = 'q';
383 else
384 *p++ = '-';
385 if (flags & _WSNF_NOEXPAND)
386 *p++ = 'E';
387 else
388 *p++ = '-';
389 if (flags & _WSNF_JOIN)
390 *p++ = 'j';
391 else
392 *p++ = '-';
393 if (flags & _WSNF_SEXP)
394 *p++ = 's';
395 else
396 *p++ = '-';
c7b3f021
SP
397 if (flags & _WSNF_DELIM)
398 *p++ = 'd';
399 else
400 *p++ = '-';
7b5e8039
SP
401 *p = 0;
402 return retbuf;
403}
404
405static const char *
406wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p)
407{
408 if (p->flags & _WSNF_NULL)
409 return "";
410 else if (p->flags & _WSNF_WORD)
411 return p->v.word;
412 else
413 return wsp->ws_input + p->v.segm.beg;
414}
415
416static size_t
417wsnode_len (struct wordsplit_node *p)
418{
419 if (p->flags & _WSNF_NULL)
420 return 0;
421 else if (p->flags & _WSNF_WORD)
422 return strlen (p->v.word);
423 else
424 return p->v.segm.end - p->v.segm.beg;
425}
426
427static int
428wsnode_new (struct wordsplit *wsp, struct wordsplit_node **pnode)
429{
430 struct wordsplit_node *node = calloc (1, sizeof (*node));
431 if (!node)
432 return _wsplt_nomem (wsp);
433 *pnode = node;
434 return 0;
435}
436
437static void
438wsnode_free (struct wordsplit_node *p)
439{
440 if (p->flags & _WSNF_WORD)
441 free (p->v.word);
442 free (p);
443}
444
445static void
446wsnode_append (struct wordsplit *wsp, struct wordsplit_node *node)
447{
448 node->next = NULL;
449 node->prev = wsp->ws_tail;
450 if (wsp->ws_tail)
451 wsp->ws_tail->next = node;
452 else
453 wsp->ws_head = node;
454 wsp->ws_tail = node;
455}
456
457static void
458wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node)
459{
460 struct wordsplit_node *p;
461
462 p = node->prev;
463 if (p)
464 {
465 p->next = node->next;
466 if (!node->next)
467 p->flags &= ~_WSNF_JOIN;
468 }
469 else
470 wsp->ws_head = node->next;
471
472 p = node->next;
473 if (p)
474 p->prev = node->prev;
475 else
476 wsp->ws_tail = node->prev;
477
478 node->next = node->prev = NULL;
479}
480
c7b3f021
SP
481static struct wordsplit_node *
482wsnode_tail (struct wordsplit_node *p)
483{
484 while (p && p->next)
485 p = p->next;
486 return p;
487}
488
7b5e8039
SP
489static void
490wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
491 struct wordsplit_node *anchor, int before)
492{
493 if (!wsp->ws_head)
494 {
495 node->next = node->prev = NULL;
496 wsp->ws_head = wsp->ws_tail = node;
497 }
498 else if (before)
499 {
500 if (anchor->prev)
501 wsnode_insert (wsp, node, anchor->prev, 0);
502 else
503 {
c7b3f021 504 struct wordsplit_node *tail = wsnode_tail (node);
7b5e8039 505 node->prev = NULL;
c7b3f021
SP
506 tail->next = anchor;
507 anchor->prev = tail;
7b5e8039
SP
508 wsp->ws_head = node;
509 }
510 }
511 else
512 {
513 struct wordsplit_node *p;
c7b3f021 514 struct wordsplit_node *tail = wsnode_tail (node);
7b5e8039
SP
515
516 p = anchor->next;
517 if (p)
c7b3f021 518 p->prev = tail;
7b5e8039 519 else
c7b3f021
SP
520 wsp->ws_tail = tail;
521 tail->next = p;
7b5e8039
SP
522 node->prev = anchor;
523 anchor->next = node;
524 }
525}
526
527static int
528wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end, int flg)
529{
530 struct wordsplit_node *node;
531 int rc;
532
533 if (end == beg && !(flg & _WSNF_EMPTYOK))
534 return 0;
535 rc = wsnode_new (wsp, &node);
536 if (rc)
537 return rc;
538 node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK);
539 node->v.segm.beg = beg;
540 node->v.segm.end = end;
541 wsnode_append (wsp, node);
542 return 0;
543}
544
545static void
546wordsplit_free_nodes (struct wordsplit *wsp)
547{
548 struct wordsplit_node *p;
549
550 for (p = wsp->ws_head; p;)
551 {
552 struct wordsplit_node *next = p->next;
553 wsnode_free (p);
554 p = next;
555 }
556 wsp->ws_head = wsp->ws_tail = NULL;
557}
558
559static void
560wordsplit_dump_nodes (struct wordsplit *wsp)
561{
562 struct wordsplit_node *p;
563 int n = 0;
564
565 for (p = wsp->ws_head, n = 0; p; p = p->next, n++)
566 {
567 if (p->flags & _WSNF_WORD)
c7b3f021
SP
568 wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%s;",
569 wsp->ws_lvl,
7b5e8039
SP
570 n, p, p->flags, wsnode_flagstr (p->flags), p->v.word);
571 else
c7b3f021
SP
572 wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%.*s;",
573 wsp->ws_lvl,
7b5e8039
SP
574 n, p, p->flags, wsnode_flagstr (p->flags),
575 (int) (p->v.segm.end - p->v.segm.beg),
576 wsp->ws_input + p->v.segm.beg);
577 }
578}
579
580static int
581coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
582{
583 struct wordsplit_node *p, *end;
584 size_t len = 0;
585 char *buf, *cur;
586 int stop;
587
c7b3f021
SP
588 if (!(node->flags & _WSNF_JOIN))
589 return 0;
590
7b5e8039
SP
591 for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next)
592 {
593 len += wsnode_len (p);
594 }
c7b3f021
SP
595 if (p)
596 len += wsnode_len (p);
7b5e8039
SP
597 end = p;
598
599 buf = malloc (len + 1);
600 if (!buf)
601 return _wsplt_nomem (wsp);
602 cur = buf;
603
604 p = node;
605 for (stop = 0; !stop;)
606 {
607 struct wordsplit_node *next = p->next;
608 const char *str = wsnode_ptr (wsp, p);
609 size_t slen = wsnode_len (p);
610
611 memcpy (cur, str, slen);
612 cur += slen;
613 if (p != node)
614 {
c7b3f021 615 node->flags |= p->flags & _WSNF_QUOTE;
7b5e8039
SP
616 wsnode_remove (wsp, p);
617 stop = p == end;
618 wsnode_free (p);
619 }
620 p = next;
621 }
622
623 *cur = 0;
624
625 node->flags &= ~_WSNF_JOIN;
626
627 if (node->flags & _WSNF_WORD)
628 free (node->v.word);
629 else
630 node->flags |= _WSNF_WORD;
631 node->v.word = buf;
632 return 0;
633}
634
c7b3f021
SP
635static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
636 char *dst, const char *src,
637 size_t n);
638
7b5e8039
SP
639static int
640wsnode_quoteremoval (struct wordsplit *wsp)
641{
642 struct wordsplit_node *p;
7b5e8039
SP
643
644 for (p = wsp->ws_head; p; p = p->next)
645 {
646 const char *str = wsnode_ptr (wsp, p);
647 size_t slen = wsnode_len (p);
648 int unquote;
649
650 if (wsp->ws_flags & WRDSF_QUOTE)
c7b3f021 651 unquote = !(p->flags & _WSNF_NOEXPAND);
7b5e8039
SP
652 else
653 unquote = 0;
654
655 if (unquote)
656 {
657 if (!(p->flags & _WSNF_WORD))
658 {
659 char *newstr = malloc (slen + 1);
660 if (!newstr)
661 return _wsplt_nomem (wsp);
662 memcpy (newstr, str, slen);
663 newstr[slen] = 0;
664 p->v.word = newstr;
665 p->flags |= _WSNF_WORD;
666 }
667
c7b3f021
SP
668 wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE,
669 p->v.word, str, slen);
7b5e8039
SP
670 }
671 }
672 return 0;
673}
674
675static int
676wsnode_coalesce (struct wordsplit *wsp)
677{
678 struct wordsplit_node *p;
679
680 for (p = wsp->ws_head; p; p = p->next)
681 {
682 if (p->flags & _WSNF_JOIN)
683 if (coalesce_segment (wsp, p))
684 return 1;
685 }
686 return 0;
687}
688
c7b3f021
SP
689static int
690wsnode_tail_coalesce (struct wordsplit *wsp, struct wordsplit_node *p)
691{
692 if (p->next)
693 {
694 struct wordsplit_node *np = p;
695 while (np && np->next)
696 {
697 np->flags |= _WSNF_JOIN;
698 np = np->next;
699 }
700 if (coalesce_segment (wsp, p))
701 return 1;
702 }
703 return 0;
704}
705
706static size_t skip_delim (struct wordsplit *wsp);
707
7b5e8039
SP
708static int
709wordsplit_finish (struct wordsplit *wsp)
710{
711 struct wordsplit_node *p;
712 size_t n;
c7b3f021 713 int delim;
7b5e8039 714
c7b3f021
SP
715 /* Postprocess delimiters. It would be rather simple, if it weren't for
716 the incremental operation.
7b5e8039 717
c7b3f021
SP
718 Nodes of type _WSNF_DELIM get inserted to the node list if either
719 WRDSF_RETURN_DELIMS flag or WRDSO_MAXWORDS option is set.
720
721 The following cases should be distinguished:
722
723 1. If both WRDSF_SQUEEZE_DELIMS and WRDSF_RETURN_DELIMS are set, compress
724 any runs of similar delimiter nodes to a single node. The nodes are
725 'similar' if they point to the same delimiter character.
726
727 If WRDSO_MAXWORDS option is set, stop compressing when
728 ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
729 a single last node.
730
731 2. If WRDSO_MAXWORDS option is set, but WRDSF_RETURN_DELIMS is not,
732 remove any delimiter nodes. Stop operation when
733 ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
734 a single last node.
735
736 3. If incremental operation is in progress, restart the loop any time
737 a delimiter node is about to be returned, unless WRDSF_RETURN_DELIMS
738 is set.
739 */
740 again:
741 delim = 0; /* Delimiter being processed (if any) */
742 n = 0; /* Number of words processed so far */
743 p = wsp->ws_head; /* Current node */
744
745 while (p)
746 {
747 struct wordsplit_node *next = p->next;
748 if (p->flags & _WSNF_DELIM)
749 {
750 if (wsp->ws_flags & WRDSF_RETURN_DELIMS)
751 {
752 if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
753 {
754 char const *s = wsnode_ptr (wsp, p);
755 if (delim)
756 {
757 if (delim == *s)
758 {
759 wsnode_remove (wsp, p);
760 p = next;
761 continue;
762 }
763 else
764 {
765 delim = 0;
766 n++; /* Count this node; it will be returned */
767 }
768 }
769 else
770 {
771 delim = *s;
772 p = next;
773 continue;
774 }
775 }
776 }
777 else if (wsp->ws_options & WRDSO_MAXWORDS)
778 {
779 wsnode_remove (wsp, p);
780 p = next;
781 continue;
782 }
783 }
784 else
785 {
786 if (delim)
787 {
788 /* Last node was a delimiter or a compressed run of delimiters;
789 Count it, and clear the delimiter marker */
790 n++;
791 delim = 0;
792 }
793 if (wsp->ws_options & WRDSO_MAXWORDS)
794 {
795 if (wsp->ws_wordi + n + 1 == wsp->ws_maxwords)
796 break;
797 }
798 }
799 n++;
800 if (wsp->ws_flags & WRDSF_INCREMENTAL)
801 p = NULL; /* Break the loop */
802 else
803 p = next;
804 }
805
806 if (p)
807 {
808 /* We're here if WRDSO_MAXWORDS is in effect and wsp->ws_maxwords
809 words have already been collected. Reconstruct a single final
810 node from the remaining nodes. */
811 if (wsnode_tail_coalesce (wsp, p))
812 return wsp->ws_errno;
813 n++;
814 }
815
816 if (n == 0 && (wsp->ws_flags & WRDSF_INCREMENTAL))
817 {
818 /* The loop above have eliminated all nodes. Restart the
819 processing, if there's any input left. */
820 if (wsp->ws_endp < wsp->ws_len)
821 {
822 int rc;
823 if (wsp->ws_flags & WRDSF_SHOWDBG)
824 wsp->ws_debug (_("Restarting"));
825 rc = wordsplit_process_list (wsp, skip_delim (wsp));
826 if (rc)
827 return rc;
828 }
829 else
830 {
831 wsp->ws_error = WRDSE_EOF;
832 return WRDSE_EOF;
833 }
834 goto again;
835 }
7b5e8039
SP
836
837 if (alloc_space (wsp, n + 1))
c7b3f021 838 return wsp->ws_errno;
7b5e8039 839
c7b3f021 840 while (wsp->ws_head)
7b5e8039 841 {
c7b3f021
SP
842 const char *str = wsnode_ptr (wsp, wsp->ws_head);
843 size_t slen = wsnode_len (wsp->ws_head);
7b5e8039
SP
844 char *newstr = malloc (slen + 1);
845
846 /* Assign newstr first, even if it is NULL. This way
847 wordsplit_free will work even if we return
848 nomem later. */
849 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = newstr;
850 if (!newstr)
851 return _wsplt_nomem (wsp);
852 memcpy (newstr, str, slen);
853 newstr[slen] = 0;
854
c7b3f021
SP
855 wsnode_remove (wsp, wsp->ws_head);
856
7b5e8039 857 wsp->ws_wordc++;
c7b3f021 858 wsp->ws_wordi++;
7b5e8039 859
c7b3f021
SP
860 if (wsp->ws_flags & WRDSF_INCREMENTAL)
861 break;
7b5e8039
SP
862 }
863 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
864 return 0;
865}
866\f
c7b3f021
SP
867int
868wordsplit_append (wordsplit_t *wsp, int argc, char **argv)
869{
870 int rc;
871 size_t i;
7b5e8039 872
c7b3f021
SP
873 rc = alloc_space (wsp, wsp->ws_wordc + argc + 1);
874 if (rc)
875 return rc;
876 for (i = 0; i < argc; i++)
877 {
878 char *newstr = strdup (argv[i]);
879 if (!newstr)
880 {
881 while (i > 0)
882 {
883 free (wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1]);
884 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1] = NULL;
885 i--;
886 }
887 return _wsplt_nomem (wsp);
888 }
889 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i] = newstr;
890 }
891 wsp->ws_wordc += i;
892 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
893 return 0;
894}
895\f
7b5e8039
SP
896/* Variable expansion */
897static int
898node_split_prefix (struct wordsplit *wsp,
899 struct wordsplit_node **ptail,
900 struct wordsplit_node *node,
901 size_t beg, size_t len, int flg)
902{
903 struct wordsplit_node *newnode;
904
905 if (len == 0)
906 return 0;
907 if (wsnode_new (wsp, &newnode))
908 return 1;
909 wsnode_insert (wsp, newnode, *ptail, 0);
910 if (node->flags & _WSNF_WORD)
911 {
912 const char *str = wsnode_ptr (wsp, node);
913 char *newstr = malloc (len + 1);
914 if (!newstr)
915 return _wsplt_nomem (wsp);
916 memcpy (newstr, str + beg, len);
917 newstr[len] = 0;
918 newnode->flags = _WSNF_WORD;
919 newnode->v.word = newstr;
920 }
921 else
922 {
923 newnode->v.segm.beg = node->v.segm.beg + beg;
924 newnode->v.segm.end = newnode->v.segm.beg + len;
925 }
926 newnode->flags |= flg;
927 *ptail = newnode;
928 return 0;
929}
930
931static int
c7b3f021
SP
932find_closing_paren (const char *str, size_t i, size_t len, size_t *poff,
933 char const *paren)
7b5e8039 934{
c7b3f021 935 enum { st_init, st_squote, st_dquote } state = st_init;
7b5e8039
SP
936 size_t level = 1;
937
938 for (; i < len; i++)
939 {
940 switch (state)
941 {
942 case st_init:
943 switch (str[i])
944 {
c7b3f021
SP
945 default:
946 if (str[i] == paren[0])
947 {
948 level++;
949 break;
950 }
951 else if (str[i] == paren[1])
7b5e8039 952 {
c7b3f021
SP
953 if (--level == 0)
954 {
955 *poff = i;
956 return 0;
957 }
958 break;
7b5e8039
SP
959 }
960 break;
c7b3f021 961
7b5e8039
SP
962 case '"':
963 state = st_dquote;
964 break;
965
966 case '\'':
967 state = st_squote;
968 break;
969 }
970 break;
971
972 case st_squote:
973 if (str[i] == '\'')
974 state = st_init;
975 break;
976
977 case st_dquote:
978 if (str[i] == '\\')
979 i++;
980 else if (str[i] == '"')
981 state = st_init;
982 break;
983 }
984 }
985 return 1;
986}
987
c7b3f021
SP
988static int
989wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len,
990 char const **ret)
7b5e8039
SP
991{
992 size_t i;
993
994 if (!(wsp->ws_flags & WRDSF_ENV))
c7b3f021 995 return WRDSE_UNDEF;
7b5e8039
SP
996
997 if (wsp->ws_flags & WRDSF_ENV_KV)
998 {
999 /* A key-value pair environment */
1000 for (i = 0; wsp->ws_env[i]; i++)
1001 {
1002 size_t elen = strlen (wsp->ws_env[i]);
1003 if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0)
c7b3f021
SP
1004 {
1005 *ret = wsp->ws_env[i + 1];
1006 return WRDSE_OK;
1007 }
7b5e8039
SP
1008 /* Skip the value. Break the loop if it is NULL. */
1009 i++;
1010 if (wsp->ws_env[i] == NULL)
1011 break;
1012 }
1013 }
c7b3f021 1014 else if (wsp->ws_env)
7b5e8039
SP
1015 {
1016 /* Usual (A=B) environment. */
1017 for (i = 0; wsp->ws_env[i]; i++)
1018 {
1019 size_t j;
1020 const char *var = wsp->ws_env[i];
1021
1022 for (j = 0; j < len; j++)
1023 if (name[j] != var[j])
1024 break;
1025 if (j == len && var[j] == '=')
c7b3f021
SP
1026 {
1027 *ret = var + j + 1;
1028 return WRDSE_OK;
1029 }
1030 }
1031 }
1032 return WRDSE_UNDEF;
1033}
1034
1035static int
1036wsplt_assign_var (struct wordsplit *wsp, const char *name, size_t namelen,
1037 char *value)
1038{
1039 int n = (wsp->ws_flags & WRDSF_ENV_KV) ? 2 : 1;
1040 char *v;
1041
1042 if (wsp->ws_envidx + n >= wsp->ws_envsiz)
1043 {
1044 size_t sz;
1045 char **newenv;
1046
1047 if (!wsp->ws_envbuf)
1048 {
1049 if (wsp->ws_flags & WRDSF_ENV)
1050 {
1051 size_t i = 0, j;
1052
1053 if (wsp->ws_env)
1054 {
1055 for (; wsp->ws_env[i]; i++)
1056 ;
1057 }
1058
1059 sz = i + n + 1;
1060
1061 newenv = calloc (sz, sizeof(newenv[0]));
1062 if (!newenv)
1063 return _wsplt_nomem (wsp);
1064
1065 for (j = 0; j < i; j++)
1066 {
1067 newenv[j] = strdup (wsp->ws_env[j]);
1068 if (!newenv[j])
1069 {
1070 for (; j > 1; j--)
1071 free (newenv[j-1]);
1072 free (newenv[j-1]);
1073 return _wsplt_nomem (wsp);
1074 }
1075 }
1076 newenv[j] = NULL;
1077
1078 wsp->ws_envbuf = newenv;
1079 wsp->ws_envidx = i;
1080 wsp->ws_envsiz = sz;
1081 wsp->ws_env = (const char**) wsp->ws_envbuf;
1082 }
1083 else
1084 {
1085 newenv = calloc (WORDSPLIT_ENV_INIT, sizeof(newenv[0]));
1086 if (!newenv)
1087 return _wsplt_nomem (wsp);
1088 wsp->ws_envbuf = newenv;
1089 wsp->ws_envidx = 0;
1090 wsp->ws_envsiz = WORDSPLIT_ENV_INIT;
1091 wsp->ws_env = (const char**) wsp->ws_envbuf;
1092 wsp->ws_flags |= WRDSF_ENV;
1093 }
1094 }
1095 else
1096 {
1097 wsp->ws_envsiz *= 2;
1098 newenv = realloc (wsp->ws_envbuf,
1099 wsp->ws_envsiz * sizeof (wsp->ws_envbuf[0]));
1100 if (!newenv)
1101 return _wsplt_nomem (wsp);
1102 wsp->ws_envbuf = newenv;
1103 wsp->ws_env = (const char**) wsp->ws_envbuf;
1104 }
1105 }
1106
1107 if (wsp->ws_flags & WRDSF_ENV_KV)
1108 {
1109 /* A key-value pair environment */
1110 char *p = malloc (namelen + 1);
1111 if (!p)
1112 return _wsplt_nomem (wsp);
1113 memcpy (p, name, namelen);
1114 p[namelen] = 0;
1115
1116 v = strdup (value);
1117 if (!v)
1118 {
1119 free (p);
1120 return _wsplt_nomem (wsp);
7b5e8039 1121 }
c7b3f021
SP
1122 wsp->ws_env[wsp->ws_envidx++] = p;
1123 wsp->ws_env[wsp->ws_envidx++] = v;
7b5e8039 1124 }
c7b3f021
SP
1125 else
1126 {
1127 v = malloc (namelen + strlen(value) + 2);
1128 if (!v)
1129 return _wsplt_nomem (wsp);
1130 memcpy (v, name, namelen);
1131 v[namelen++] = '=';
1132 strcpy(v + namelen, value);
1133 wsp->ws_env[wsp->ws_envidx++] = v;
1134 }
1135 wsp->ws_env[wsp->ws_envidx++] = NULL;
1136 return WRDSE_OK;
7b5e8039
SP
1137}
1138
1139static int
1140expvar (struct wordsplit *wsp, const char *str, size_t len,
e5474174 1141 struct wordsplit_node **ptail, const char **pend, unsigned flg)
7b5e8039
SP
1142{
1143 size_t i = 0;
1144 const char *defstr = NULL;
c7b3f021 1145 char *value;
7b5e8039
SP
1146 const char *vptr;
1147 struct wordsplit_node *newnode;
1148 const char *start = str - 1;
c7b3f021
SP
1149 int rc;
1150 struct wordsplit ws;
1151
1152 if (ISVARBEG (str[0]))
7b5e8039
SP
1153 {
1154 for (i = 1; i < len; i++)
c7b3f021 1155 if (!ISVARCHR (str[i]))
7b5e8039
SP
1156 break;
1157 *pend = str + i - 1;
1158 }
1159 else if (str[0] == '{')
1160 {
1161 str++;
1162 len--;
1163 for (i = 1; i < len; i++)
7b5e8039 1164 {
c7b3f021 1165 if (str[i] == ':')
7b5e8039 1166 {
c7b3f021
SP
1167 size_t j;
1168
1169 defstr = str + i + 1;
1170 if (find_closing_paren (str, i + 1, len, &j, "{}"))
1171 return _wsplt_seterr (wsp, WRDSE_CBRACE);
1172 *pend = str + j;
1173 break;
1174 }
1175 else if (str[i] == '}')
1176 {
1177 defstr = NULL;
1178 *pend = str + i;
1179 break;
1180 }
1181 else if (strchr ("-+?=", str[i]))
1182 {
1183 size_t j;
1184
1185 defstr = str + i;
1186 if (find_closing_paren (str, i, len, &j, "{}"))
1187 return _wsplt_seterr (wsp, WRDSE_CBRACE);
1188 *pend = str + j;
1189 break;
7b5e8039 1190 }
7b5e8039 1191 }
c7b3f021
SP
1192 if (i == len)
1193 return _wsplt_seterr (wsp, WRDSE_CBRACE);
7b5e8039
SP
1194 }
1195 else
1196 {
1197 if (wsnode_new (wsp, &newnode))
1198 return 1;
1199 wsnode_insert (wsp, newnode, *ptail, 0);
1200 *ptail = newnode;
1201 newnode->flags = _WSNF_WORD | flg;
1202 newnode->v.word = malloc (3);
1203 if (!newnode->v.word)
1204 return _wsplt_nomem (wsp);
1205 newnode->v.word[0] = '$';
1206 newnode->v.word[1] = str[0];
1207 newnode->v.word[2] = 0;
1208 *pend = str;
1209 return 0;
1210 }
1211
1212 /* Actually expand the variable */
1213 /* str - start of the variable name
1214 i - its length
1215 defstr - default replacement str */
1216
c7b3f021 1217 if (defstr && strchr("-+?=", defstr[0]) == 0)
7b5e8039 1218 {
c7b3f021
SP
1219 rc = WRDSE_UNDEF;
1220 defstr = NULL;
7b5e8039
SP
1221 }
1222 else
1223 {
c7b3f021
SP
1224 rc = wordsplit_find_env (wsp, str, i, &vptr);
1225 if (rc == WRDSE_OK)
1226 {
1227 if (vptr)
1228 {
1229 value = strdup (vptr);
1230 if (!value)
1231 rc = WRDSE_NOSPACE;
1232 }
1233 else
1234 rc = WRDSE_UNDEF;
1235 }
1236 else if (wsp->ws_flags & WRDSF_GETVAR)
1237 rc = wsp->ws_getvar (&value, str, i, wsp->ws_closure);
7b5e8039 1238 else
c7b3f021 1239 rc = WRDSE_UNDEF;
17f99bc6 1240
c7b3f021
SP
1241 if (rc == WRDSE_OK
1242 && (!value || value[0] == 0)
1243 && defstr && defstr[-1] == ':')
1244 {
1245 free (value);
1246 rc = WRDSE_UNDEF;
1247 }
1248 }
1249
1250 switch (rc)
7b5e8039 1251 {
c7b3f021
SP
1252 case WRDSE_OK:
1253 if (defstr && *defstr == '+')
7b5e8039 1254 {
c7b3f021
SP
1255 size_t size = *pend - ++defstr;
1256
1257 rc = _wsplt_subsplit (wsp, &ws, defstr, size,
1258 WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE |
1259 (wsp->ws_flags &
1260 (WRDSF_NOVAR | WRDSF_NOCMD)), 1);
1261 if (rc)
1262 return rc;
1263 free (value);
1264 value = ws.ws_wordv[0];
1265 ws.ws_wordv[0] = NULL;
1266 wordsplit_free (&ws);
7b5e8039 1267 }
c7b3f021
SP
1268 break;
1269
1270 case WRDSE_UNDEF:
1271 if (defstr)
7b5e8039 1272 {
c7b3f021
SP
1273 size_t size;
1274 if (*defstr == '-' || *defstr == '=')
1275 {
1276 size = *pend - ++defstr;
1277
1278 rc = _wsplt_subsplit (wsp, &ws, defstr, size,
1279 WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE |
1280 (wsp->ws_flags &
1281 (WRDSF_NOVAR | WRDSF_NOCMD)),
1282 1);
1283 if (rc)
1284 return rc;
1285
1286 value = ws.ws_wordv[0];
1287 ws.ws_wordv[0] = NULL;
1288 wordsplit_free (&ws);
1289
1290 if (defstr[-1] == '=')
1291 wsplt_assign_var (wsp, str, i, value);
1292 }
1293 else
1294 {
1295 if (*defstr == '?')
1296 {
1297 size = *pend - ++defstr;
1298 if (size == 0)
1299 wsp->ws_error (_("%.*s: variable null or not set"),
1300 (int) i, str);
1301 else
1302 {
1303 rc = _wsplt_subsplit (wsp, &ws, defstr, size,
1304 WRDSF_NOSPLIT | WRDSF_WS |
1305 WRDSF_QUOTE |
1306 (wsp->ws_flags &
1307 (WRDSF_NOVAR | WRDSF_NOCMD)),
1308 1);
1309 if (rc == 0)
1310 wsp->ws_error ("%.*s: %s",
1311 (int) i, str, ws.ws_wordv[0]);
1312 else
1313 wsp->ws_error ("%.*s: %.*s",
1314 (int) i, str, (int) size, defstr);
1315 wordsplit_free (&ws);
1316 }
1317 }
1318 value = NULL;
1319 }
1320 }
1321 else if (wsp->ws_flags & WRDSF_UNDEF)
1322 {
1323 _wsplt_seterr (wsp, WRDSE_UNDEF);
1324 return 1;
1325 }
1326 else
1327 {
1328 if (wsp->ws_flags & WRDSF_WARNUNDEF)
1329 wsp->ws_error (_("warning: undefined variable `%.*s'"),
1330 (int) i, str);
1331 if (wsp->ws_flags & WRDSF_KEEPUNDEF)
1332 value = NULL;
1333 else
1334 {
1335 value = strdup ("");
1336 if (!value)
1337 return _wsplt_nomem (wsp);
1338 }
1339 }
1340 break;
1341
1342 case WRDSE_NOSPACE:
1343 return _wsplt_nomem (wsp);
1344
1345 case WRDSE_USERERR:
1346 if (wsp->ws_errno == WRDSE_USERERR)
1347 free (wsp->ws_usererr);
1348 wsp->ws_usererr = value;
e5474174 1349 FALLTHROUGH;
c7b3f021
SP
1350 default:
1351 _wsplt_seterr (wsp, rc);
1352 return 1;
1353 }
1354
1355 if (value)
1356 {
1357 if (flg & _WSNF_QUOTE)
1358 {
1359 if (wsnode_new (wsp, &newnode))
1360 return 1;
1361 wsnode_insert (wsp, newnode, *ptail, 0);
1362 *ptail = newnode;
1363 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
1364 newnode->v.word = value;
1365 }
1366 else if (*value == 0)
1367 {
1368 free (value);
1369 /* Empty string is a special case */
1370 if (wsnode_new (wsp, &newnode))
1371 return 1;
7b5e8039
SP
1372 wsnode_insert (wsp, newnode, *ptail, 0);
1373 *ptail = newnode;
1374 newnode->flags = _WSNF_NULL;
1375 }
1376 else
1377 {
1378 struct wordsplit ws;
c7b3f021
SP
1379 int rc;
1380
1381 rc = _wsplt_subsplit (wsp, &ws, value, strlen (value),
1382 WRDSF_NOVAR | WRDSF_NOCMD |
1383 WRDSF_QUOTE
1384 | (WSP_RETURN_DELIMS (wsp) ? WRDSF_RETURN_DELIMS : 0) ,
1385 0);
1386 free (value);
1387 if (rc)
7b5e8039 1388 {
c7b3f021 1389 _wsplt_seterr_sub (wsp, &ws);
7b5e8039
SP
1390 wordsplit_free (&ws);
1391 return 1;
1392 }
c7b3f021
SP
1393 wsnode_insert (wsp, ws.ws_head, *ptail, 0);
1394 *ptail = ws.ws_tail;
1395 ws.ws_head = ws.ws_tail = NULL;
7b5e8039
SP
1396 wordsplit_free (&ws);
1397 }
1398 }
1399 else if (wsp->ws_flags & WRDSF_KEEPUNDEF)
1400 {
1401 size_t size = *pend - start + 1;
1402
1403 if (wsnode_new (wsp, &newnode))
1404 return 1;
1405 wsnode_insert (wsp, newnode, *ptail, 0);
1406 *ptail = newnode;
1407 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
1408 newnode->v.word = malloc (size + 1);
1409 if (!newnode->v.word)
1410 return _wsplt_nomem (wsp);
1411 memcpy (newnode->v.word, start, size);
1412 newnode->v.word[size] = 0;
1413 }
1414 else
1415 {
1416 if (wsnode_new (wsp, &newnode))
1417 return 1;
1418 wsnode_insert (wsp, newnode, *ptail, 0);
1419 *ptail = newnode;
1420 newnode->flags = _WSNF_NULL;
1421 }
1422 return 0;
1423}
1424
1425static int
c7b3f021
SP
1426begin_var_p (int c)
1427{
1428 return c == '{' || ISVARBEG (c);
1429}
1430
1431static int
1432node_expand (struct wordsplit *wsp, struct wordsplit_node *node,
1433 int (*beg_p) (int),
1434 int (*ws_exp_fn) (struct wordsplit *wsp,
1435 const char *str, size_t len,
1436 struct wordsplit_node **ptail,
1437 const char **pend,
e5474174 1438 unsigned flg))
7b5e8039
SP
1439{
1440 const char *str = wsnode_ptr (wsp, node);
1441 size_t slen = wsnode_len (node);
1442 const char *end = str + slen;
1443 const char *p;
1444 size_t off = 0;
1445 struct wordsplit_node *tail = node;
1446
1447 for (p = str; p < end; p++)
1448 {
1449 if (*p == '\\')
1450 {
1451 p++;
1452 continue;
1453 }
c7b3f021 1454 if (*p == '$' && beg_p (p[1]))
7b5e8039
SP
1455 {
1456 size_t n = p - str;
1457
1458 if (tail != node)
1459 tail->flags |= _WSNF_JOIN;
1460 if (node_split_prefix (wsp, &tail, node, off, n, _WSNF_JOIN))
1461 return 1;
1462 p++;
c7b3f021
SP
1463 if (ws_exp_fn (wsp, p, slen - n, &tail, &p,
1464 node->flags & (_WSNF_JOIN | _WSNF_QUOTE)))
7b5e8039
SP
1465 return 1;
1466 off += p - str + 1;
1467 str = p + 1;
1468 }
1469 }
1470 if (p > str)
1471 {
1472 if (tail != node)
1473 tail->flags |= _WSNF_JOIN;
1474 if (node_split_prefix (wsp, &tail, node, off, p - str,
c7b3f021 1475 node->flags & (_WSNF_JOIN|_WSNF_QUOTE)))
7b5e8039
SP
1476 return 1;
1477 }
1478 if (tail != node)
1479 {
1480 wsnode_remove (wsp, node);
1481 wsnode_free (node);
1482 }
1483 return 0;
1484}
c7b3f021
SP
1485
1486/* Remove NULL nodes from the list */
7b5e8039
SP
1487static void
1488wsnode_nullelim (struct wordsplit *wsp)
1489{
1490 struct wordsplit_node *p;
1491
1492 for (p = wsp->ws_head; p;)
1493 {
1494 struct wordsplit_node *next = p->next;
c7b3f021
SP
1495 if (p->flags & _WSNF_DELIM && p->prev)
1496 p->prev->flags &= ~_WSNF_JOIN;
7b5e8039
SP
1497 if (p->flags & _WSNF_NULL)
1498 {
1499 wsnode_remove (wsp, p);
1500 wsnode_free (p);
1501 }
1502 p = next;
1503 }
1504}
1505
1506static int
1507wordsplit_varexp (struct wordsplit *wsp)
1508{
1509 struct wordsplit_node *p;
1510
c7b3f021
SP
1511 for (p = wsp->ws_head; p;)
1512 {
1513 struct wordsplit_node *next = p->next;
1514 if (!(p->flags & (_WSNF_NOEXPAND|_WSNF_DELIM)))
1515 if (node_expand (wsp, p, begin_var_p, expvar))
1516 return 1;
1517 p = next;
1518 }
1519
1520 wsnode_nullelim (wsp);
1521 return 0;
1522}
1523\f
1524static int
1525begin_cmd_p (int c)
1526{
1527 return c == '(';
1528}
1529
1530static int
1531expcmd (struct wordsplit *wsp, const char *str, size_t len,
e5474174 1532 struct wordsplit_node **ptail, const char **pend, unsigned flg)
c7b3f021
SP
1533{
1534 int rc;
1535 size_t j;
1536 char *value;
1537 struct wordsplit_node *newnode;
1538
1539 str++;
1540 len--;
1541
1542 if (find_closing_paren (str, 0, len, &j, "()"))
1543 {
1544 _wsplt_seterr (wsp, WRDSE_PAREN);
1545 return 1;
1546 }
1547
1548 *pend = str + j;
1549 if (wsp->ws_options & WRDSO_ARGV)
1550 {
1551 struct wordsplit ws;
1552
1553 rc = _wsplt_subsplit (wsp, &ws, str, j, WRDSF_WS | WRDSF_QUOTE, 1);
1554 if (rc)
1555 {
1556 _wsplt_seterr_sub (wsp, &ws);
1557 wordsplit_free (&ws);
1558 return 1;
1559 }
1560 rc = wsp->ws_command (&value, str, j, ws.ws_wordv, wsp->ws_closure);
1561 wordsplit_free (&ws);
1562 }
1563 else
1564 rc = wsp->ws_command (&value, str, j, NULL, wsp->ws_closure);
1565
1566 if (rc == WRDSE_NOSPACE)
1567 return _wsplt_nomem (wsp);
1568 else if (rc)
1569 {
1570 if (rc == WRDSE_USERERR)
1571 {
1572 if (wsp->ws_errno == WRDSE_USERERR)
1573 free (wsp->ws_usererr);
1574 wsp->ws_usererr = value;
1575 }
1576 _wsplt_seterr (wsp, rc);
1577 return 1;
1578 }
1579
1580 if (value)
1581 {
1582 if (flg & _WSNF_QUOTE)
1583 {
1584 if (wsnode_new (wsp, &newnode))
1585 return 1;
1586 wsnode_insert (wsp, newnode, *ptail, 0);
1587 *ptail = newnode;
1588 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
1589 newnode->v.word = value;
1590 }
1591 else if (*value == 0)
1592 {
1593 free (value);
1594 /* Empty string is a special case */
1595 if (wsnode_new (wsp, &newnode))
1596 return 1;
1597 wsnode_insert (wsp, newnode, *ptail, 0);
1598 *ptail = newnode;
1599 newnode->flags = _WSNF_NULL;
1600 }
1601 else
1602 {
1603 struct wordsplit ws;
1604 int rc;
1605
1606 rc = _wsplt_subsplit (wsp, &ws, value, strlen (value),
1607 WRDSF_NOVAR | WRDSF_NOCMD
1608 | WRDSF_WS | WRDSF_QUOTE
1609 | (WSP_RETURN_DELIMS (wsp) ? WRDSF_RETURN_DELIMS : 0),
1610 0);
1611 free (value);
1612 if (rc)
1613 {
1614 _wsplt_seterr_sub (wsp, &ws);
1615 wordsplit_free (&ws);
1616 return 1;
1617 }
1618 wsnode_insert (wsp, ws.ws_head, *ptail, 0);
1619 *ptail = ws.ws_tail;
1620 ws.ws_head = ws.ws_tail = NULL;
1621 wordsplit_free (&ws);
1622 }
1623 }
1624 else
1625 {
1626 if (wsnode_new (wsp, &newnode))
1627 return 1;
1628 wsnode_insert (wsp, newnode, *ptail, 0);
1629 *ptail = newnode;
1630 newnode->flags = _WSNF_NULL;
1631 }
1632 return 0;
1633}
1634
1635static int
1636wordsplit_cmdexp (struct wordsplit *wsp)
1637{
1638 struct wordsplit_node *p;
1639
7b5e8039
SP
1640 for (p = wsp->ws_head; p;)
1641 {
1642 struct wordsplit_node *next = p->next;
1643 if (!(p->flags & _WSNF_NOEXPAND))
c7b3f021 1644 if (node_expand (wsp, p, begin_cmd_p, expcmd))
7b5e8039
SP
1645 return 1;
1646 p = next;
1647 }
1648
1649 wsnode_nullelim (wsp);
1650 return 0;
1651}
1652\f
1653/* Strip off any leading and trailing whitespace. This function is called
1654 right after the initial scanning, therefore it assumes that every
1655 node in the list is a text reference node. */
c7b3f021 1656static int
7b5e8039
SP
1657wordsplit_trimws (struct wordsplit *wsp)
1658{
1659 struct wordsplit_node *p;
1660
1661 for (p = wsp->ws_head; p; p = p->next)
1662 {
1663 size_t n;
1664
c7b3f021
SP
1665 if (!(p->flags & _WSNF_QUOTE))
1666 {
1667 /* Skip leading whitespace: */
1668 for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]);
1669 n++)
1670 ;
1671 p->v.segm.beg = n;
1672 }
1673
1674 while (p->next && (p->flags & _WSNF_JOIN))
1675 p = p->next;
1676
7b5e8039
SP
1677 if (p->flags & _WSNF_QUOTE)
1678 continue;
c7b3f021 1679
7b5e8039
SP
1680 /* Trim trailing whitespace */
1681 for (n = p->v.segm.end;
1682 n > p->v.segm.beg && ISWS (wsp->ws_input[n - 1]); n--);
1683 p->v.segm.end = n;
1684 if (p->v.segm.beg == p->v.segm.end)
1685 p->flags |= _WSNF_NULL;
1686 }
1687
1688 wsnode_nullelim (wsp);
c7b3f021
SP
1689 return 0;
1690}
1691\f
1692static int
1693wordsplit_tildexpand (struct wordsplit *wsp)
1694{
1695 struct wordsplit_node *p;
1696 char *uname = NULL;
1697 size_t usize = 0;
1698
1699 for (p = wsp->ws_head; p; p = p->next)
1700 {
1701 const char *str;
1702
1703 if (p->flags & _WSNF_QUOTE)
1704 continue;
1705
1706 str = wsnode_ptr (wsp, p);
1707 if (str[0] == '~')
1708 {
1709 size_t i, size, dlen;
1710 size_t slen = wsnode_len (p);
1711 struct passwd *pw;
1712 char *newstr;
1713
1714 for (i = 1; i < slen && str[i] != '/'; i++)
1715 ;
1716 if (i == slen)
1717 continue;
1718 if (i > 1)
1719 {
1720 if (i > usize)
1721 {
1722 char *p = realloc (uname, i);
1723 if (!p)
1724 {
1725 free (uname);
1726 return _wsplt_nomem (wsp);
1727 }
1728 uname = p;
1729 usize = i;
1730 }
1731 --i;
1732 memcpy (uname, str + 1, i);
1733 uname[i] = 0;
1734 pw = getpwnam (uname);
1735 }
1736 else
1737 pw = getpwuid (getuid ());
1738
1739 if (!pw)
1740 continue;
1741
1742 dlen = strlen (pw->pw_dir);
1743 size = slen - i + dlen;
1744 newstr = malloc (size);
1745 if (!newstr)
1746 {
1747 free (uname);
1748 return _wsplt_nomem (wsp);
1749 }
1750 --size;
1751
1752 memcpy (newstr, pw->pw_dir, dlen);
1753 memcpy (newstr + dlen, str + i + 1, slen - i - 1);
1754 newstr[size] = 0;
1755 if (p->flags & _WSNF_WORD)
1756 free (p->v.word);
1757 p->v.word = newstr;
1758 p->flags |= _WSNF_WORD;
1759 }
1760 }
1761 free (uname);
1762 return 0;
1763}
1764\f
1765static int
1766isglob (const char *s, int l)
1767{
1768 while (l--)
1769 {
1770 if (strchr ("*?[", *s++))
1771 return 1;
1772 }
1773 return 0;
1774}
1775
1776static int
1777wordsplit_pathexpand (struct wordsplit *wsp)
1778{
1779 struct wordsplit_node *p, *next;
1780 char *pattern = NULL;
1781 size_t patsize = 0;
1782 size_t slen;
1783 int flags = 0;
1784
1785#ifdef GLOB_PERIOD
1786 if (wsp->ws_options & WRDSO_DOTGLOB)
1787 flags = GLOB_PERIOD;
1788#endif
1789
1790 for (p = wsp->ws_head; p; p = next)
1791 {
1792 const char *str;
1793
1794 next = p->next;
1795
1796 if (p->flags & _WSNF_QUOTE)
1797 continue;
1798
1799 str = wsnode_ptr (wsp, p);
1800 slen = wsnode_len (p);
1801
1802 if (isglob (str, slen))
1803 {
1804 int i;
1805 glob_t g;
1806 struct wordsplit_node *prev;
1807
1808 if (slen + 1 > patsize)
1809 {
1810 char *p = realloc (pattern, slen + 1);
1811 if (!p)
1812 return _wsplt_nomem (wsp);
1813 pattern = p;
1814 patsize = slen + 1;
1815 }
1816 memcpy (pattern, str, slen);
1817 pattern[slen] = 0;
1818
1819 switch (glob (pattern, flags, NULL, &g))
1820 {
1821 case 0:
1822 break;
1823
1824 case GLOB_NOSPACE:
1825 free (pattern);
1826 return _wsplt_nomem (wsp);
1827
1828 case GLOB_NOMATCH:
1829 if (wsp->ws_options & WRDSO_NULLGLOB)
1830 {
1831 wsnode_remove (wsp, p);
1832 wsnode_free (p);
1833 }
1834 else if (wsp->ws_options & WRDSO_FAILGLOB)
1835 {
1836 char buf[128];
1837 if (wsp->ws_errno == WRDSE_USERERR)
1838 free (wsp->ws_usererr);
1839 snprintf (buf, sizeof (buf), _("no files match pattern %s"),
1840 pattern);
1841 free (pattern);
1842 wsp->ws_usererr = strdup (buf);
1843 if (!wsp->ws_usererr)
1844 return _wsplt_nomem (wsp);
1845 else
1846 return _wsplt_seterr (wsp, WRDSE_USERERR);
1847 }
1848 continue;
1849
1850 default:
1851 free (pattern);
1852 return _wsplt_seterr (wsp, WRDSE_GLOBERR);
1853 }
1854
1855 prev = p;
1856 for (i = 0; i < g.gl_pathc; i++)
1857 {
1858 struct wordsplit_node *newnode;
1859 char *newstr;
1860
1861 if (wsnode_new (wsp, &newnode))
1862 return 1;
1863 newstr = strdup (g.gl_pathv[i]);
1864 if (!newstr)
1865 return _wsplt_nomem (wsp);
1866 newnode->v.word = newstr;
1867 newnode->flags |= _WSNF_WORD|_WSNF_QUOTE;
1868 wsnode_insert (wsp, newnode, prev, 0);
1869 prev = newnode;
1870 }
1871 globfree (&g);
1872
1873 wsnode_remove (wsp, p);
1874 wsnode_free (p);
1875 }
1876 }
1877 free (pattern);
1878 return 0;
7b5e8039
SP
1879}
1880\f
1881static int
1882skip_sed_expr (const char *command, size_t i, size_t len)
1883{
1884 int state;
1885
1886 do
1887 {
1888 int delim;
1889
1890 if (command[i] == ';')
1891 i++;
1892 if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])))
1893 break;
1894
1895 delim = command[++i];
1896 state = 1;
1897 for (i++; i < len; i++)
1898 {
1899 if (state == 3)
1900 {
1901 if (command[i] == delim || !ISALNUM (command[i]))
1902 break;
1903 }
1904 else if (command[i] == '\\')
1905 i++;
1906 else if (command[i] == delim)
1907 state++;
1908 }
1909 }
1910 while (state == 3 && i < len && command[i] == ';');
1911 return i;
1912}
1913
c7b3f021
SP
1914/* wsp->ws_endp points to a delimiter character. If RETURN_DELIMS
1915 is true, return its value, otherwise return the index past it. */
1916static inline size_t
1917skip_delim_internal (struct wordsplit *wsp, int return_delims)
7b5e8039 1918{
c7b3f021
SP
1919 return return_delims ? wsp->ws_endp : wsp->ws_endp + 1;
1920}
7b5e8039 1921
c7b3f021
SP
1922static inline size_t
1923skip_delim (struct wordsplit *wsp)
1924{
1925 return skip_delim_internal (wsp, WSP_RETURN_DELIMS (wsp));
1926}
7b5e8039 1927
c7b3f021
SP
1928static inline size_t
1929skip_delim_real (struct wordsplit *wsp)
1930{
1931 return skip_delim_internal (wsp, wsp->ws_flags & WRDSF_RETURN_DELIMS);
7b5e8039
SP
1932}
1933
1934#define _WRDS_EOF 0
1935#define _WRDS_OK 1
1936#define _WRDS_ERR 2
1937
1938static int
c7b3f021 1939scan_qstring (struct wordsplit *wsp, size_t start, size_t *end)
7b5e8039
SP
1940{
1941 size_t j;
1942 const char *command = wsp->ws_input;
1943 size_t len = wsp->ws_len;
1944 char q = command[start];
1945
1946 for (j = start + 1; j < len && command[j] != q; j++)
1947 if (q == '"' && command[j] == '\\')
1948 j++;
1949 if (j < len && command[j] == q)
1950 {
e5474174 1951 unsigned flags = _WSNF_QUOTE | _WSNF_EMPTYOK;
7b5e8039
SP
1952 if (q == '\'')
1953 flags |= _WSNF_NOEXPAND;
1954 if (wordsplit_add_segm (wsp, start + 1, j, flags))
1955 return _WRDS_ERR;
1956 *end = j;
1957 }
1958 else
1959 {
1960 wsp->ws_endp = start;
c7b3f021 1961 _wsplt_seterr (wsp, WRDSE_QUOTE);
7b5e8039
SP
1962 return _WRDS_ERR;
1963 }
1964 return 0;
1965}
1966
1967static int
c7b3f021 1968scan_word (struct wordsplit *wsp, size_t start, int consume_all)
7b5e8039
SP
1969{
1970 size_t len = wsp->ws_len;
1971 const char *command = wsp->ws_input;
1972 const char *comment = wsp->ws_comment;
1973 int join = 0;
e5474174 1974 unsigned flags = 0;
c7b3f021
SP
1975 struct wordsplit_node *np = wsp->ws_tail;
1976
7b5e8039
SP
1977 size_t i = start;
1978
1979 if (i >= len)
1980 {
1981 wsp->ws_errno = WRDSE_EOF;
1982 return _WRDS_EOF;
1983 }
1984
1985 start = i;
1986
1987 if (wsp->ws_flags & WRDSF_SED_EXPR
1988 && command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1]))
1989 {
1990 flags = _WSNF_SEXP;
1991 i = skip_sed_expr (command, i, len);
1992 }
c7b3f021 1993 else if (consume_all || !ISDELIM (wsp, command[i]))
7b5e8039
SP
1994 {
1995 while (i < len)
1996 {
1997 if (comment && strchr (comment, command[i]) != NULL)
1998 {
1999 size_t j;
2000 for (j = i + 1; j < len && command[j] != '\n'; j++)
2001 ;
2002 if (wordsplit_add_segm (wsp, start, i, 0))
2003 return _WRDS_ERR;
2004 wsp->ws_endp = j;
2005 return _WRDS_OK;
2006 }
2007
2008 if (wsp->ws_flags & WRDSF_QUOTE)
2009 {
2010 if (command[i] == '\\')
2011 {
2012 if (++i == len)
2013 break;
2014 i++;
2015 continue;
2016 }
2017
2018 if (((wsp->ws_flags & WRDSF_SQUOTE) && command[i] == '\'') ||
2019 ((wsp->ws_flags & WRDSF_DQUOTE) && command[i] == '"'))
2020 {
2021 if (join && wsp->ws_tail)
2022 wsp->ws_tail->flags |= _WSNF_JOIN;
2023 if (wordsplit_add_segm (wsp, start, i, _WSNF_JOIN))
2024 return _WRDS_ERR;
2025 if (scan_qstring (wsp, i, &i))
2026 return _WRDS_ERR;
2027 start = i + 1;
2028 join = 1;
2029 }
2030 }
2031
c7b3f021
SP
2032 if (command[i] == '$')
2033 {
2034 if (!(wsp->ws_flags & WRDSF_NOVAR)
2035 && command[i+1] == '{'
2036 && find_closing_paren (command, i + 2, len, &i, "{}") == 0)
2037 continue;
2038 if (!(wsp->ws_flags & WRDSF_NOCMD)
2039 && command[i+1] == '('
2040 && find_closing_paren (command, i + 2, len, &i, "()") == 0)
2041 continue;
2042 }
2043
2044 if (!consume_all && ISDELIM (wsp, command[i]))
7b5e8039
SP
2045 break;
2046 else
2047 i++;
2048 }
2049 }
c7b3f021 2050 else if (WSP_RETURN_DELIMS (wsp))
7b5e8039
SP
2051 {
2052 i++;
c7b3f021 2053 flags |= _WSNF_DELIM;
7b5e8039
SP
2054 }
2055 else if (!(wsp->ws_flags & WRDSF_SQUEEZE_DELIMS))
2056 flags |= _WSNF_EMPTYOK;
2057
2058 if (join && i > start && wsp->ws_tail)
2059 wsp->ws_tail->flags |= _WSNF_JOIN;
2060 if (wordsplit_add_segm (wsp, start, i, flags))
2061 return _WRDS_ERR;
2062 wsp->ws_endp = i;
2063 if (wsp->ws_flags & WRDSF_INCREMENTAL)
2064 return _WRDS_EOF;
c7b3f021
SP
2065
2066 if (consume_all)
7b5e8039 2067 {
c7b3f021
SP
2068 if (!np)
2069 np = wsp->ws_head;
2070 while (np)
2071 {
2072 np->flags |= _WSNF_QUOTE;
2073 np = np->next;
2074 }
7b5e8039 2075 }
c7b3f021
SP
2076
2077 return _WRDS_OK;
7b5e8039
SP
2078}
2079
2080#define to_num(c) \
2081 (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
2082
2083static int
2084xtonum (int *pval, const char *src, int base, int cnt)
2085{
2086 int i, val;
2087
2088 for (i = 0, val = 0; i < cnt; i++, src++)
2089 {
2090 int n = *(unsigned char *) src;
2091 if (n > 127 || (n = to_num (n)) >= base)
2092 break;
2093 val = val * base + n;
2094 }
2095 *pval = val;
2096 return i;
2097}
2098
2099size_t
2100wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
2101{
2102 size_t len = 0;
2103
2104 *quote = 0;
2105 for (; *str; str++)
2106 {
2107 if (strchr (" \"", *str))
2108 *quote = 1;
2109
2110 if (*str == ' ')
2111 len++;
2112 else if (*str == '"')
2113 len += 2;
2114 else if (*str != '\t' && *str != '\\' && ISPRINT (*str))
2115 len++;
2116 else if (quote_hex)
2117 len += 3;
2118 else
2119 {
c7b3f021 2120 if (wordsplit_c_quote_char (*str))
7b5e8039
SP
2121 len += 2;
2122 else
2123 len += 4;
2124 }
2125 }
2126 return len;
2127}
2128
c7b3f021
SP
2129static int
2130wsplt_unquote_char (const char *transtab, int c)
7b5e8039 2131{
c7b3f021 2132 while (*transtab && transtab[1])
7b5e8039 2133 {
c7b3f021
SP
2134 if (*transtab++ == c)
2135 return *transtab;
2136 ++transtab;
7b5e8039 2137 }
c7b3f021 2138 return 0;
7b5e8039
SP
2139}
2140
c7b3f021
SP
2141static int
2142wsplt_quote_char (const char *transtab, int c)
7b5e8039 2143{
c7b3f021 2144 for (; *transtab && transtab[1]; transtab += 2)
7b5e8039 2145 {
c7b3f021
SP
2146 if (transtab[1] == c)
2147 return *transtab;
7b5e8039 2148 }
c7b3f021
SP
2149 return 0;
2150}
2151
2152int
2153wordsplit_c_unquote_char (int c)
2154{
2155 return wsplt_unquote_char (wordsplit_c_escape_tab, c);
2156}
2157
2158int
2159wordsplit_c_quote_char (int c)
2160{
2161 return wsplt_quote_char (wordsplit_c_escape_tab, c);
7b5e8039
SP
2162}
2163
2164void
c7b3f021
SP
2165wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
2166 char *dst, const char *src, size_t n)
7b5e8039
SP
2167{
2168 int i = 0;
2169 int c;
2170
c7b3f021 2171 inquote = !!inquote;
7b5e8039
SP
2172 while (i < n)
2173 {
2174 if (src[i] == '\\')
2175 {
2176 ++i;
c7b3f021
SP
2177 if (WRDSO_ESC_TEST (ws, inquote, WRDSO_XESC)
2178 && (src[i] == 'x' || src[i] == 'X'))
7b5e8039
SP
2179 {
2180 if (n - i < 2)
2181 {
2182 *dst++ = '\\';
2183 *dst++ = src[i++];
2184 }
2185 else
2186 {
2187 int off = xtonum (&c, src + i + 1,
2188 16, 2);
2189 if (off == 0)
2190 {
2191 *dst++ = '\\';
2192 *dst++ = src[i++];
2193 }
2194 else
2195 {
2196 *dst++ = c;
2197 i += off + 1;
2198 }
2199 }
2200 }
c7b3f021
SP
2201 else if (WRDSO_ESC_TEST (ws, inquote, WRDSO_OESC)
2202 && (unsigned char) src[i] < 128 && ISDIGIT (src[i]))
7b5e8039
SP
2203 {
2204 if (n - i < 1)
2205 {
2206 *dst++ = '\\';
2207 *dst++ = src[i++];
2208 }
2209 else
2210 {
2211 int off = xtonum (&c, src + i, 8, 3);
2212 if (off == 0)
2213 {
2214 *dst++ = '\\';
2215 *dst++ = src[i++];
2216 }
2217 else
2218 {
2219 *dst++ = c;
2220 i += off;
2221 }
2222 }
2223 }
c7b3f021
SP
2224 else if ((c = wsplt_unquote_char (ws->ws_escape[inquote], src[i])))
2225 {
2226 *dst++ = c;
2227 ++i;
2228 }
7b5e8039 2229 else
c7b3f021
SP
2230 {
2231 if (WRDSO_ESC_TEST (ws, inquote, WRDSO_BSKEEP))
2232 *dst++ = '\\';
2233 *dst++ = src[i++];
2234 }
7b5e8039
SP
2235 }
2236 else
2237 *dst++ = src[i++];
2238 }
2239 *dst = 0;
2240}
2241
2242void
2243wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
2244{
2245 for (; *src; src++)
2246 {
2247 if (*src == '"')
2248 {
2249 *dst++ = '\\';
2250 *dst++ = *src;
2251 }
2252 else if (*src != '\t' && *src != '\\' && ISPRINT (*src))
2253 *dst++ = *src;
2254 else
2255 {
2256 char tmp[4];
2257
2258 if (quote_hex)
2259 {
2260 snprintf (tmp, sizeof tmp, "%%%02X", *(unsigned char *) src);
2261 memcpy (dst, tmp, 3);
2262 dst += 3;
2263 }
2264 else
2265 {
2266 int c = wordsplit_c_quote_char (*src);
2267 *dst++ = '\\';
c7b3f021 2268 if (c)
7b5e8039
SP
2269 *dst++ = c;
2270 else
2271 {
2272 snprintf (tmp, sizeof tmp, "%03o", *(unsigned char *) src);
2273 memcpy (dst, tmp, 3);
2274 dst += 3;
2275 }
2276 }
2277 }
2278 }
2279}
2280
c7b3f021
SP
2281
2282/* This structure describes a single expansion phase */
2283struct exptab
2284{
2285 char const *descr; /* Textual description (for debugging) */
2286 int flag; /* WRDSF_ bit that controls this phase */
2287 int opt; /* Entry-specific options (see EXPOPT_ flags below */
2288 int (*expansion) (struct wordsplit *wsp); /* expansion function */
2289};
2290
2291/* The following options control expansions: */
2292/* Normally the exptab entry is run if its flag bit is set in struct
2293 wordsplit. The EXPOPT_NEG option negates this test so that expansion
2294 is performed if its associated flag bit is not set in struct wordsplit. */
2295#define EXPOPT_NEG 0x01
2296/* All bits in flag must be set in order for entry to match */
2297#define EXPORT_ALLOF 0x02
2298/* Coalesce the input list before running the expansion. */
2299#define EXPOPT_COALESCE 0x04
2300
2301static struct exptab exptab[] = {
2302 { N_("WS trimming"), WRDSF_WS, 0,
2303 wordsplit_trimws },
2304 { N_("command substitution"), WRDSF_NOCMD, EXPOPT_NEG|EXPOPT_COALESCE,
2305 wordsplit_cmdexp },
2306 { N_("coalesce list"), 0, EXPOPT_NEG|EXPOPT_COALESCE,
2307 NULL },
2308 { N_("tilde expansion"), WRDSF_PATHEXPAND, 0,
2309 wordsplit_tildexpand },
2310 { N_("variable expansion"), WRDSF_NOVAR, EXPOPT_NEG,
2311 wordsplit_varexp },
2312 { N_("quote removal"), 0, EXPOPT_NEG,
2313 wsnode_quoteremoval },
2314 { N_("coalesce list"), 0, EXPOPT_NEG|EXPOPT_COALESCE,
2315 NULL },
2316 { N_("path expansion"), WRDSF_PATHEXPAND, 0,
2317 wordsplit_pathexpand },
2318 { NULL }
2319};
2320
2321static inline int
2322exptab_matches(struct exptab *p, struct wordsplit *wsp)
2323{
2324 int result;
2325
2326 result = (wsp->ws_flags & p->flag);
2327 if (p->opt & EXPORT_ALLOF)
2328 result = result == p->flag;
2329 if (p->opt & EXPOPT_NEG)
2330 result = !result;
2331
2332 return result;
2333}
2334
7b5e8039
SP
2335static int
2336wordsplit_process_list (struct wordsplit *wsp, size_t start)
2337{
c7b3f021
SP
2338 struct exptab *p;
2339
2340 if (wsp->ws_flags & WRDSF_SHOWDBG)
2341 wsp->ws_debug (_("(%02d) Input:%.*s;"),
2342 wsp->ws_lvl, (int) wsp->ws_len, wsp->ws_input);
2343
2344 if ((wsp->ws_flags & WRDSF_NOSPLIT)
2345 || ((wsp->ws_options & WRDSO_MAXWORDS)
2346 && wsp->ws_wordi + 1 == wsp->ws_maxwords))
2347 {
2348 /* Treat entire input as a single word */
2349 if (scan_word (wsp, start, 1) == _WRDS_ERR)
7b5e8039
SP
2350 return wsp->ws_errno;
2351 }
2352 else
2353 {
2354 int rc;
2355
c7b3f021 2356 while ((rc = scan_word (wsp, start, 0)) == _WRDS_OK)
7b5e8039
SP
2357 start = skip_delim (wsp);
2358 /* Make sure tail element is not joinable */
2359 if (wsp->ws_tail)
2360 wsp->ws_tail->flags &= ~_WSNF_JOIN;
2361 if (rc == _WRDS_ERR)
2362 return wsp->ws_errno;
2363 }
2364
2365 if (wsp->ws_flags & WRDSF_SHOWDBG)
2366 {
c7b3f021 2367 wsp->ws_debug ("(%02d) %s", wsp->ws_lvl, _("Initial list:"));
7b5e8039
SP
2368 wordsplit_dump_nodes (wsp);
2369 }
2370
c7b3f021 2371 for (p = exptab; p->descr; p++)
7b5e8039 2372 {
c7b3f021 2373 if (exptab_matches(p, wsp))
7b5e8039 2374 {
c7b3f021
SP
2375 if (p->opt & EXPOPT_COALESCE)
2376 {
2377 if (wsnode_coalesce (wsp))
2378 break;
2379 if (wsp->ws_flags & WRDSF_SHOWDBG)
2380 {
2381 wsp->ws_debug ("(%02d) %s", wsp->ws_lvl,
2382 _("Coalesced list:"));
2383 wordsplit_dump_nodes (wsp);
2384 }
2385 }
2386 if (p->expansion)
2387 {
2388 if (p->expansion (wsp))
2389 break;
2390 if (wsp->ws_flags & WRDSF_SHOWDBG)
2391 {
2392 wsp->ws_debug ("(%02d) %s", wsp->ws_lvl, _(p->descr));
2393 wordsplit_dump_nodes (wsp);
2394 }
2395 }
7b5e8039
SP
2396 }
2397 }
7b5e8039
SP
2398 return wsp->ws_errno;
2399}
2400
c7b3f021
SP
2401static int
2402wordsplit_run (const char *command, size_t length, struct wordsplit *wsp,
e5474174 2403 unsigned flags, int lvl)
7b5e8039
SP
2404{
2405 int rc;
2406 size_t start;
7b5e8039
SP
2407
2408 if (!command)
2409 {
2410 if (!(flags & WRDSF_INCREMENTAL))
c7b3f021
SP
2411 return _wsplt_seterr (wsp, WRDSE_USAGE);
2412
2413 if (wsp->ws_head)
2414 return wordsplit_finish (wsp);
7b5e8039 2415
c7b3f021 2416 start = skip_delim_real (wsp);
7b5e8039 2417 if (wsp->ws_endp == wsp->ws_len)
c7b3f021 2418 return _wsplt_seterr (wsp, WRDSE_NOINPUT);
7b5e8039 2419
7b5e8039
SP
2420 wsp->ws_flags |= WRDSF_REUSE;
2421 wordsplit_init0 (wsp);
2422 }
2423 else
2424 {
7b5e8039 2425 start = 0;
c7b3f021 2426 rc = wordsplit_init (wsp, command, length, flags);
7b5e8039
SP
2427 if (rc)
2428 return rc;
c7b3f021 2429 wsp->ws_lvl = lvl;
7b5e8039
SP
2430 }
2431
7b5e8039 2432 rc = wordsplit_process_list (wsp, start);
7b5e8039 2433 if (rc)
c7b3f021
SP
2434 return rc;
2435 return wordsplit_finish (wsp);
2436}
2437
2438int
2439wordsplit_len (const char *command, size_t length, struct wordsplit *wsp,
e5474174 2440 unsigned flags)
c7b3f021
SP
2441{
2442 return wordsplit_run (command, length, wsp, flags, 0);
7b5e8039
SP
2443}
2444
2445int
e5474174 2446wordsplit (const char *command, struct wordsplit *ws, unsigned flags)
7b5e8039 2447{
c7b3f021 2448 return wordsplit_len (command, command ? strlen (command) : 0, ws, flags);
7b5e8039
SP
2449}
2450
2451void
2452wordsplit_free_words (struct wordsplit *ws)
2453{
2454 size_t i;
2455
2456 for (i = 0; i < ws->ws_wordc; i++)
2457 {
2458 char *p = ws->ws_wordv[ws->ws_offs + i];
2459 if (p)
2460 {
2461 free (p);
2462 ws->ws_wordv[ws->ws_offs + i] = NULL;
2463 }
2464 }
2465 ws->ws_wordc = 0;
2466}
2467
c7b3f021
SP
2468void
2469wordsplit_free_envbuf (struct wordsplit *ws)
2470{
2471 if (ws->ws_flags & WRDSF_NOCMD)
2472 return;
2473 if (ws->ws_envbuf)
2474 {
2475 size_t i;
2476
2477 for (i = 0; ws->ws_envbuf[i]; i++)
2478 free (ws->ws_envbuf[i]);
2479 free (ws->ws_envbuf);
2480 ws->ws_envidx = ws->ws_envsiz = 0;
2481 ws->ws_envbuf = NULL;
2482 }
2483}
2484
2485void
2486wordsplit_clearerr (struct wordsplit *ws)
2487{
2488 if (ws->ws_errno == WRDSE_USERERR)
2489 free (ws->ws_usererr);
2490 ws->ws_usererr = NULL;
2491 ws->ws_errno = WRDSE_OK;
2492}
2493
7b5e8039
SP
2494void
2495wordsplit_free (struct wordsplit *ws)
2496{
c7b3f021 2497 wordsplit_free_nodes (ws);
7b5e8039
SP
2498 wordsplit_free_words (ws);
2499 free (ws->ws_wordv);
2500 ws->ws_wordv = NULL;
c7b3f021 2501 wordsplit_free_envbuf (ws);
7b5e8039
SP
2502}
2503
c7b3f021
SP
2504int
2505wordsplit_get_words (struct wordsplit *ws, size_t *wordc, char ***wordv)
7b5e8039 2506{
c7b3f021
SP
2507 char **p = realloc (ws->ws_wordv,
2508 (ws->ws_wordc + 1) * sizeof (ws->ws_wordv[0]));
2509 if (!p)
2510 return -1;
2511 *wordv = p;
2512 *wordc = ws->ws_wordc;
7b5e8039 2513
c7b3f021
SP
2514 ws->ws_wordv = NULL;
2515 ws->ws_wordc = 0;
2516 ws->ws_wordn = 0;
7b5e8039 2517
c7b3f021 2518 return 0;
7b5e8039
SP
2519}
2520
2521const char *_wordsplit_errstr[] = {
2522 N_("no error"),
2523 N_("missing closing quote"),
2524 N_("memory exhausted"),
7b5e8039
SP
2525 N_("invalid wordsplit usage"),
2526 N_("unbalanced curly brace"),
2527 N_("undefined variable"),
c7b3f021
SP
2528 N_("input exhausted"),
2529 N_("unbalanced parenthesis"),
2530 N_("globbing error")
7b5e8039
SP
2531};
2532int _wordsplit_nerrs =
2533 sizeof (_wordsplit_errstr) / sizeof (_wordsplit_errstr[0]);
2534
2535const char *
2536wordsplit_strerror (struct wordsplit *ws)
2537{
c7b3f021
SP
2538 if (ws->ws_errno == WRDSE_USERERR)
2539 return ws->ws_usererr;
7b5e8039
SP
2540 if (ws->ws_errno < _wordsplit_nerrs)
2541 return _wordsplit_errstr[ws->ws_errno];
2542 return N_("unknown error");
2543}
c7b3f021
SP
2544
2545void
2546wordsplit_perror (struct wordsplit *wsp)
2547{
2548 switch (wsp->ws_errno)
2549 {
2550 case WRDSE_QUOTE:
2551 wsp->ws_error (_("missing closing %c (start near #%lu)"),
2552 wsp->ws_input[wsp->ws_endp],
2553 (unsigned long) wsp->ws_endp);
2554 break;
2555
2556 default:
2557 wsp->ws_error ("%s", wordsplit_strerror (wsp));
2558 }
2559}
2560