]> git.ipfire.org Git - thirdparty/tar.git/blame - lib/wordsplit.c
Fix Savane bug #64581
[thirdparty/tar.git] / lib / wordsplit.c
CommitLineData
7b5e8039 1/* wordsplit - a word splitter
c7b3f021 2 Copyright (C) 2009-2018 Sergey Poznyakoff
7b5e8039
SP
3
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3 of the License, or (at your
7 option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
c7b3f021 15 with this program. If not, see <http://www.gnu.org/licenses/>. */
7b5e8039
SP
16
17#ifdef HAVE_CONFIG_H
18# include <config.h>
19#endif
20
21#include <errno.h>
22#include <ctype.h>
23#include <unistd.h>
24#include <stdlib.h>
25#include <string.h>
26#include <stdio.h>
27#include <stdarg.h>
c7b3f021
SP
28#include <pwd.h>
29#include <glob.h>
7b5e8039
SP
30
31#if ENABLE_NLS
32# include <gettext.h>
33#else
34# define gettext(msgid) msgid
35#endif
36#define _(msgid) gettext (msgid)
37#define N_(msgid) msgid
38
39#include <wordsplit.h>
40
41#define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
42#define ISDELIM(ws,c) \
43 (strchr ((ws)->ws_delim, (c)) != NULL)
44#define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL)
45#define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z')
46#define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z')
47#define ISALPHA(c) (ISUPPER(c) || ISLOWER(c))
48#define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9')
49#define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL)
50#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
51#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
52
c7b3f021
SP
53#define ISVARBEG(c) (ISALPHA(c) || c == '_')
54#define ISVARCHR(c) (ISALNUM(c) || c == '_')
55
56#define WSP_RETURN_DELIMS(wsp) \
57 ((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS))
58
7b5e8039
SP
59#define ALLOC_INIT 128
60#define ALLOC_INCR 128
61
62static void
63_wsplt_alloc_die (struct wordsplit *wsp)
64{
c7b3f021 65 wsp->ws_error ("%s", _("memory exhausted"));
7b5e8039
SP
66 abort ();
67}
68
f4e2411b 69static void ATTRIBUTE_FORMAT ((__printf__, 1, 2))
7b5e8039
SP
70_wsplt_error (const char *fmt, ...)
71{
72 va_list ap;
73
74 va_start (ap, fmt);
75 vfprintf (stderr, fmt, ap);
76 va_end (ap);
77 fputc ('\n', stderr);
78}
79
80static void wordsplit_free_nodes (struct wordsplit *);
81
c7b3f021
SP
82static int
83_wsplt_seterr (struct wordsplit *wsp, int ec)
84{
85 wsp->ws_errno = ec;
86 if (wsp->ws_flags & WRDSF_SHOWERR)
87 wordsplit_perror (wsp);
88 return ec;
89}
8378991c 90
7b5e8039
SP
91static int
92_wsplt_nomem (struct wordsplit *wsp)
93{
94 errno = ENOMEM;
95 wsp->ws_errno = WRDSE_NOSPACE;
96 if (wsp->ws_flags & WRDSF_ENOMEMABRT)
97 wsp->ws_alloc_die (wsp);
98 if (wsp->ws_flags & WRDSF_SHOWERR)
99 wordsplit_perror (wsp);
100 if (!(wsp->ws_flags & WRDSF_REUSE))
101 wordsplit_free (wsp);
102 wordsplit_free_nodes (wsp);
103 return wsp->ws_errno;
104}
105
c7b3f021
SP
106static int wordsplit_run (const char *command, size_t length,
107 struct wordsplit *wsp,
e5474174 108 unsigned flags, int lvl);
c7b3f021
SP
109
110static int wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
e5474174 111 unsigned flags);
c7b3f021
SP
112static int wordsplit_process_list (struct wordsplit *wsp, size_t start);
113static int wordsplit_finish (struct wordsplit *wsp);
114
115static int
116_wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss,
117 char const *str, int len,
e5474174 118 unsigned flags, int finalize)
c7b3f021
SP
119{
120 int rc;
8378991c 121
c7b3f021
SP
122 wss->ws_delim = wsp->ws_delim;
123 wss->ws_debug = wsp->ws_debug;
124 wss->ws_error = wsp->ws_error;
125 wss->ws_alloc_die = wsp->ws_alloc_die;
126
127 if (!(flags & WRDSF_NOVAR))
128 {
129 wss->ws_env = wsp->ws_env;
130 wss->ws_getvar = wsp->ws_getvar;
131 flags |= wsp->ws_flags & (WRDSF_ENV | WRDSF_ENV_KV | WRDSF_GETVAR);
132 }
133 if (!(flags & WRDSF_NOCMD))
134 {
135 wss->ws_command = wsp->ws_command;
136 }
137
138 if ((flags & (WRDSF_NOVAR|WRDSF_NOCMD)) != (WRDSF_NOVAR|WRDSF_NOCMD))
139 {
140 wss->ws_closure = wsp->ws_closure;
141 flags |= wsp->ws_flags & WRDSF_CLOSURE;
142 }
143
144 wss->ws_options = wsp->ws_options;
8378991c 145
c7b3f021
SP
146 flags |= WRDSF_DELIM
147 | WRDSF_ALLOC_DIE
148 | WRDSF_ERROR
149 | WRDSF_DEBUG
150 | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR | WRDSF_OPTIONS));
151
152 rc = wordsplit_init (wss, str, len, flags);
153 if (rc)
154 return rc;
155 wss->ws_lvl = wsp->ws_lvl + 1;
156 rc = wordsplit_process_list (wss, 0);
157 if (rc)
158 {
159 wordsplit_free_nodes (wss);
160 return rc;
161 }
162 if (finalize)
163 {
164 rc = wordsplit_finish (wss);
165 wordsplit_free_nodes (wss);
166 }
167 return rc;
168}
169
170static void
171_wsplt_seterr_sub (struct wordsplit *wsp, struct wordsplit *wss)
172{
173 if (wsp->ws_errno == WRDSE_USERERR)
174 free (wsp->ws_usererr);
175 wsp->ws_errno = wss->ws_errno;
176 if (wss->ws_errno == WRDSE_USERERR)
177 {
178 wsp->ws_usererr = wss->ws_usererr;
179 wss->ws_errno = WRDSE_EOF;
180 wss->ws_usererr = NULL;
181 }
182}
183
7b5e8039
SP
184static void
185wordsplit_init0 (struct wordsplit *wsp)
186{
187 if (wsp->ws_flags & WRDSF_REUSE)
188 {
189 if (!(wsp->ws_flags & WRDSF_APPEND))
190 wordsplit_free_words (wsp);
c7b3f021 191 wordsplit_clearerr (wsp);
7b5e8039
SP
192 }
193 else
194 {
195 wsp->ws_wordv = NULL;
196 wsp->ws_wordc = 0;
197 wsp->ws_wordn = 0;
198 }
199
200 wsp->ws_errno = 0;
7b5e8039
SP
201}
202
c7b3f021 203char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
8378991c 204
7b5e8039
SP
205static int
206wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
e5474174 207 unsigned flags)
7b5e8039
SP
208{
209 wsp->ws_flags = flags;
210
211 if (!(wsp->ws_flags & WRDSF_ALLOC_DIE))
212 wsp->ws_alloc_die = _wsplt_alloc_die;
213 if (!(wsp->ws_flags & WRDSF_ERROR))
214 wsp->ws_error = _wsplt_error;
215
c7b3f021 216 if (!(wsp->ws_flags & WRDSF_NOVAR))
7b5e8039 217 {
c7b3f021
SP
218 /* These will be initialized on first variable assignment */
219 wsp->ws_envidx = wsp->ws_envsiz = 0;
220 wsp->ws_envbuf = NULL;
7b5e8039
SP
221 }
222
223 if (!(wsp->ws_flags & WRDSF_NOCMD))
224 {
c7b3f021
SP
225 if (!wsp->ws_command)
226 {
227 _wsplt_seterr (wsp, WRDSE_USAGE);
228 errno = EINVAL;
229 return wsp->ws_errno;
230 }
7b5e8039
SP
231 }
232
233 if (wsp->ws_flags & WRDSF_SHOWDBG)
234 {
235 if (!(wsp->ws_flags & WRDSF_DEBUG))
236 {
237 if (wsp->ws_flags & WRDSF_ERROR)
238 wsp->ws_debug = wsp->ws_error;
239 else if (wsp->ws_flags & WRDSF_SHOWERR)
240 wsp->ws_debug = _wsplt_error;
241 else
242 wsp->ws_flags &= ~WRDSF_SHOWDBG;
243 }
244 }
245
246 wsp->ws_input = input;
247 wsp->ws_len = len;
248
249 if (!(wsp->ws_flags & WRDSF_DOOFFS))
250 wsp->ws_offs = 0;
251
252 if (!(wsp->ws_flags & WRDSF_DELIM))
253 wsp->ws_delim = " \t\n";
254
255 if (!(wsp->ws_flags & WRDSF_COMMENT))
256 wsp->ws_comment = NULL;
257
258 if (!(wsp->ws_flags & WRDSF_CLOSURE))
259 wsp->ws_closure = NULL;
260
c7b3f021
SP
261 if (!(wsp->ws_flags & WRDSF_OPTIONS))
262 wsp->ws_options = 0;
263
264 if (wsp->ws_flags & WRDSF_ESCAPE)
265 {
266 if (!wsp->ws_escape[WRDSX_WORD])
267 wsp->ws_escape[WRDSX_WORD] = "";
268 if (!wsp->ws_escape[WRDSX_QUOTE])
269 wsp->ws_escape[WRDSX_QUOTE] = "";
270 }
271 else
272 {
273 if (wsp->ws_flags & WRDSF_CESCAPES)
274 {
275 wsp->ws_escape[WRDSX_WORD] = wordsplit_c_escape_tab;
276 wsp->ws_escape[WRDSX_QUOTE] = wordsplit_c_escape_tab;
8378991c 277 wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD
c7b3f021
SP
278 | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD;
279 }
280 else
281 {
282 wsp->ws_escape[WRDSX_WORD] = "";
283 wsp->ws_escape[WRDSX_QUOTE] = "\\\\\"\"";
284 wsp->ws_options |= WRDSO_BSKEEP_QUOTE;
285 }
286 }
8378991c 287
7b5e8039 288 wsp->ws_endp = 0;
c7b3f021 289 wsp->ws_wordi = 0;
7b5e8039 290
c7b3f021
SP
291 if (wsp->ws_flags & WRDSF_REUSE)
292 wordsplit_free_nodes (wsp);
293 wsp->ws_head = wsp->ws_tail = NULL;
8378991c 294
7b5e8039 295 wordsplit_init0 (wsp);
8378991c 296
7b5e8039
SP
297 return 0;
298}
299
300static int
301alloc_space (struct wordsplit *wsp, size_t count)
302{
303 size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0;
304 char **ptr;
305 size_t newalloc;
306
307 if (wsp->ws_wordv == NULL)
308 {
309 newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT;
310 ptr = calloc (newalloc, sizeof (ptr[0]));
311 }
312 else if (wsp->ws_wordn < offs + wsp->ws_wordc + count)
313 {
314 newalloc = offs + wsp->ws_wordc +
315 (count > ALLOC_INCR ? count : ALLOC_INCR);
316 ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0]));
317 }
318 else
319 return 0;
320
321 if (ptr)
322 {
323 wsp->ws_wordn = newalloc;
324 wsp->ws_wordv = ptr;
325 }
326 else
327 return _wsplt_nomem (wsp);
328 return 0;
329}
330\f
331
332/* Node state flags */
333#define _WSNF_NULL 0x01 /* null node (a noop) */
334#define _WSNF_WORD 0x02 /* node contains word in v.word */
335#define _WSNF_QUOTE 0x04 /* text is quoted */
336#define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
337#define _WSNF_JOIN 0x10 /* node must be joined with the next node */
338#define _WSNF_SEXP 0x20 /* is a sed expression */
c7b3f021 339#define _WSNF_DELIM 0x40 /* node is a delimiter */
7b5e8039
SP
340
341#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
342 wordsplit_add_segm must add the
343 segment even if it is empty */
344
345struct wordsplit_node
346{
347 struct wordsplit_node *prev; /* Previous element */
348 struct wordsplit_node *next; /* Next element */
e5474174 349 unsigned flags; /* Node flags */
7b5e8039
SP
350 union
351 {
352 struct
353 {
354 size_t beg; /* Start of word in ws_input */
355 size_t end; /* End of word in ws_input */
356 } segm;
357 char *word;
358 } v;
359};
360
361static const char *
e5474174 362wsnode_flagstr (unsigned flags)
7b5e8039 363{
c7b3f021 364 static char retbuf[7];
7b5e8039
SP
365 char *p = retbuf;
366
367 if (flags & _WSNF_WORD)
368 *p++ = 'w';
369 else if (flags & _WSNF_NULL)
370 *p++ = 'n';
371 else
372 *p++ = '-';
373 if (flags & _WSNF_QUOTE)
374 *p++ = 'q';
375 else
376 *p++ = '-';
377 if (flags & _WSNF_NOEXPAND)
378 *p++ = 'E';
379 else
380 *p++ = '-';
381 if (flags & _WSNF_JOIN)
382 *p++ = 'j';
383 else
384 *p++ = '-';
385 if (flags & _WSNF_SEXP)
386 *p++ = 's';
387 else
388 *p++ = '-';
c7b3f021
SP
389 if (flags & _WSNF_DELIM)
390 *p++ = 'd';
391 else
392 *p++ = '-';
7b5e8039
SP
393 *p = 0;
394 return retbuf;
395}
396
397static const char *
398wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p)
399{
400 if (p->flags & _WSNF_NULL)
401 return "";
402 else if (p->flags & _WSNF_WORD)
403 return p->v.word;
404 else
405 return wsp->ws_input + p->v.segm.beg;
406}
407
408static size_t
409wsnode_len (struct wordsplit_node *p)
410{
411 if (p->flags & _WSNF_NULL)
412 return 0;
413 else if (p->flags & _WSNF_WORD)
414 return strlen (p->v.word);
415 else
416 return p->v.segm.end - p->v.segm.beg;
417}
418
8378991c
PE
419static struct wordsplit_node *
420wsnode_new (struct wordsplit *wsp)
7b5e8039
SP
421{
422 struct wordsplit_node *node = calloc (1, sizeof (*node));
423 if (!node)
8378991c
PE
424 _wsplt_nomem (wsp);
425 return node;
7b5e8039
SP
426}
427
428static void
429wsnode_free (struct wordsplit_node *p)
430{
431 if (p->flags & _WSNF_WORD)
432 free (p->v.word);
433 free (p);
434}
435
436static void
437wsnode_append (struct wordsplit *wsp, struct wordsplit_node *node)
438{
439 node->next = NULL;
440 node->prev = wsp->ws_tail;
441 if (wsp->ws_tail)
442 wsp->ws_tail->next = node;
443 else
444 wsp->ws_head = node;
445 wsp->ws_tail = node;
446}
447
448static void
449wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node)
450{
451 struct wordsplit_node *p;
452
453 p = node->prev;
454 if (p)
455 {
456 p->next = node->next;
457 if (!node->next)
458 p->flags &= ~_WSNF_JOIN;
459 }
460 else
461 wsp->ws_head = node->next;
462
463 p = node->next;
464 if (p)
465 p->prev = node->prev;
466 else
467 wsp->ws_tail = node->prev;
468
469 node->next = node->prev = NULL;
470}
471
c7b3f021
SP
472static struct wordsplit_node *
473wsnode_tail (struct wordsplit_node *p)
474{
51142180 475 while (p && p->next)
c7b3f021
SP
476 p = p->next;
477 return p;
478}
479
7b5e8039
SP
480static void
481wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
482 struct wordsplit_node *anchor, int before)
483{
484 if (!wsp->ws_head)
485 {
486 node->next = node->prev = NULL;
487 wsp->ws_head = wsp->ws_tail = node;
488 }
489 else if (before)
490 {
491 if (anchor->prev)
492 wsnode_insert (wsp, node, anchor->prev, 0);
493 else
494 {
c7b3f021 495 struct wordsplit_node *tail = wsnode_tail (node);
7b5e8039 496 node->prev = NULL;
c7b3f021
SP
497 tail->next = anchor;
498 anchor->prev = tail;
7b5e8039
SP
499 wsp->ws_head = node;
500 }
501 }
502 else
503 {
504 struct wordsplit_node *p;
c7b3f021 505 struct wordsplit_node *tail = wsnode_tail (node);
7b5e8039
SP
506
507 p = anchor->next;
508 if (p)
c7b3f021 509 p->prev = tail;
7b5e8039 510 else
c7b3f021
SP
511 wsp->ws_tail = tail;
512 tail->next = p;
7b5e8039
SP
513 node->prev = anchor;
514 anchor->next = node;
515 }
516}
517
518static int
519wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end, int flg)
520{
7b5e8039
SP
521 if (end == beg && !(flg & _WSNF_EMPTYOK))
522 return 0;
8378991c
PE
523 struct wordsplit_node *node = wsnode_new (wsp);
524 if (!node)
525 return 1;
7b5e8039
SP
526 node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK);
527 node->v.segm.beg = beg;
528 node->v.segm.end = end;
529 wsnode_append (wsp, node);
530 return 0;
531}
532
533static void
534wordsplit_free_nodes (struct wordsplit *wsp)
535{
536 struct wordsplit_node *p;
537
538 for (p = wsp->ws_head; p;)
539 {
540 struct wordsplit_node *next = p->next;
541 wsnode_free (p);
542 p = next;
543 }
544 wsp->ws_head = wsp->ws_tail = NULL;
545}
546
547static void
548wordsplit_dump_nodes (struct wordsplit *wsp)
549{
550 struct wordsplit_node *p;
551 int n = 0;
552
553 for (p = wsp->ws_head, n = 0; p; p = p->next, n++)
554 {
555 if (p->flags & _WSNF_WORD)
c7b3f021
SP
556 wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%s;",
557 wsp->ws_lvl,
7b5e8039
SP
558 n, p, p->flags, wsnode_flagstr (p->flags), p->v.word);
559 else
c7b3f021
SP
560 wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%.*s;",
561 wsp->ws_lvl,
7b5e8039
SP
562 n, p, p->flags, wsnode_flagstr (p->flags),
563 (int) (p->v.segm.end - p->v.segm.beg),
564 wsp->ws_input + p->v.segm.beg);
565 }
566}
567
568static int
569coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
570{
571 struct wordsplit_node *p, *end;
572 size_t len = 0;
573 char *buf, *cur;
7b5e8039 574
51142180
SP
575 if (!(node->flags & _WSNF_JOIN))
576 return 0;
577
578 for (p = node; p && (p->flags & _WSNF_JOIN); p = p->next)
7b5e8039
SP
579 {
580 len += wsnode_len (p);
581 }
51142180
SP
582 if (p)
583 len += wsnode_len (p);
7b5e8039
SP
584 end = p;
585
586 buf = malloc (len + 1);
587 if (!buf)
588 return _wsplt_nomem (wsp);
589 cur = buf;
590
591 p = node;
e5aac38c 592 for (;;)
7b5e8039
SP
593 {
594 struct wordsplit_node *next = p->next;
595 const char *str = wsnode_ptr (wsp, p);
596 size_t slen = wsnode_len (p);
597
598 memcpy (cur, str, slen);
599 cur += slen;
600 if (p != node)
601 {
c7b3f021 602 node->flags |= p->flags & _WSNF_QUOTE;
7b5e8039 603 wsnode_remove (wsp, p);
e5aac38c
PE
604 if (p == end)
605 {
606 /* Call wsnode_free separately to work around GCC bug 106427. */
607 wsnode_free (p);
608 break;
609 }
7b5e8039
SP
610 wsnode_free (p);
611 }
612 p = next;
613 }
614
615 *cur = 0;
616
617 node->flags &= ~_WSNF_JOIN;
618
619 if (node->flags & _WSNF_WORD)
620 free (node->v.word);
621 else
622 node->flags |= _WSNF_WORD;
623 node->v.word = buf;
624 return 0;
625}
626
c7b3f021
SP
627static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
628 char *dst, const char *src,
629 size_t n);
630
7b5e8039
SP
631static int
632wsnode_quoteremoval (struct wordsplit *wsp)
633{
634 struct wordsplit_node *p;
7b5e8039
SP
635
636 for (p = wsp->ws_head; p; p = p->next)
637 {
638 const char *str = wsnode_ptr (wsp, p);
639 size_t slen = wsnode_len (p);
640 int unquote;
641
642 if (wsp->ws_flags & WRDSF_QUOTE)
c7b3f021 643 unquote = !(p->flags & _WSNF_NOEXPAND);
7b5e8039
SP
644 else
645 unquote = 0;
646
647 if (unquote)
648 {
649 if (!(p->flags & _WSNF_WORD))
650 {
651 char *newstr = malloc (slen + 1);
652 if (!newstr)
653 return _wsplt_nomem (wsp);
654 memcpy (newstr, str, slen);
655 newstr[slen] = 0;
656 p->v.word = newstr;
657 p->flags |= _WSNF_WORD;
658 }
659
c7b3f021
SP
660 wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE,
661 p->v.word, str, slen);
7b5e8039
SP
662 }
663 }
664 return 0;
665}
666
667static int
668wsnode_coalesce (struct wordsplit *wsp)
669{
670 struct wordsplit_node *p;
671
672 for (p = wsp->ws_head; p; p = p->next)
673 {
674 if (p->flags & _WSNF_JOIN)
675 if (coalesce_segment (wsp, p))
676 return 1;
677 }
678 return 0;
679}
680
c7b3f021
SP
681static int
682wsnode_tail_coalesce (struct wordsplit *wsp, struct wordsplit_node *p)
683{
684 if (p->next)
685 {
686 struct wordsplit_node *np = p;
687 while (np && np->next)
688 {
689 np->flags |= _WSNF_JOIN;
690 np = np->next;
691 }
692 if (coalesce_segment (wsp, p))
693 return 1;
694 }
695 return 0;
696}
697
698static size_t skip_delim (struct wordsplit *wsp);
699
7b5e8039
SP
700static int
701wordsplit_finish (struct wordsplit *wsp)
702{
703 struct wordsplit_node *p;
704 size_t n;
c7b3f021 705 int delim;
7b5e8039 706
c7b3f021
SP
707 /* Postprocess delimiters. It would be rather simple, if it weren't for
708 the incremental operation.
7b5e8039 709
c7b3f021
SP
710 Nodes of type _WSNF_DELIM get inserted to the node list if either
711 WRDSF_RETURN_DELIMS flag or WRDSO_MAXWORDS option is set.
8378991c 712
c7b3f021
SP
713 The following cases should be distinguished:
714
715 1. If both WRDSF_SQUEEZE_DELIMS and WRDSF_RETURN_DELIMS are set, compress
716 any runs of similar delimiter nodes to a single node. The nodes are
717 'similar' if they point to the same delimiter character.
718
719 If WRDSO_MAXWORDS option is set, stop compressing when
720 ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
721 a single last node.
722
723 2. If WRDSO_MAXWORDS option is set, but WRDSF_RETURN_DELIMS is not,
724 remove any delimiter nodes. Stop operation when
725 ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
726 a single last node.
727
728 3. If incremental operation is in progress, restart the loop any time
729 a delimiter node is about to be returned, unless WRDSF_RETURN_DELIMS
730 is set.
731 */
732 again:
733 delim = 0; /* Delimiter being processed (if any) */
734 n = 0; /* Number of words processed so far */
735 p = wsp->ws_head; /* Current node */
736
737 while (p)
738 {
739 struct wordsplit_node *next = p->next;
740 if (p->flags & _WSNF_DELIM)
741 {
742 if (wsp->ws_flags & WRDSF_RETURN_DELIMS)
743 {
744 if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
745 {
746 char const *s = wsnode_ptr (wsp, p);
747 if (delim)
748 {
749 if (delim == *s)
750 {
751 wsnode_remove (wsp, p);
752 p = next;
753 continue;
754 }
755 else
756 {
757 delim = 0;
758 n++; /* Count this node; it will be returned */
759 }
760 }
761 else
762 {
763 delim = *s;
764 p = next;
765 continue;
766 }
767 }
768 }
769 else if (wsp->ws_options & WRDSO_MAXWORDS)
770 {
771 wsnode_remove (wsp, p);
772 p = next;
773 continue;
774 }
775 }
8378991c 776 else
c7b3f021
SP
777 {
778 if (delim)
779 {
780 /* Last node was a delimiter or a compressed run of delimiters;
781 Count it, and clear the delimiter marker */
782 n++;
783 delim = 0;
784 }
785 if (wsp->ws_options & WRDSO_MAXWORDS)
786 {
787 if (wsp->ws_wordi + n + 1 == wsp->ws_maxwords)
788 break;
789 }
790 }
791 n++;
792 if (wsp->ws_flags & WRDSF_INCREMENTAL)
793 p = NULL; /* Break the loop */
794 else
795 p = next;
796 }
797
798 if (p)
799 {
800 /* We're here if WRDSO_MAXWORDS is in effect and wsp->ws_maxwords
801 words have already been collected. Reconstruct a single final
802 node from the remaining nodes. */
803 if (wsnode_tail_coalesce (wsp, p))
804 return wsp->ws_errno;
805 n++;
806 }
807
808 if (n == 0 && (wsp->ws_flags & WRDSF_INCREMENTAL))
809 {
810 /* The loop above have eliminated all nodes. Restart the
811 processing, if there's any input left. */
812 if (wsp->ws_endp < wsp->ws_len)
813 {
814 int rc;
815 if (wsp->ws_flags & WRDSF_SHOWDBG)
816 wsp->ws_debug (_("Restarting"));
817 rc = wordsplit_process_list (wsp, skip_delim (wsp));
818 if (rc)
819 return rc;
820 }
821 else
822 {
823 wsp->ws_error = WRDSE_EOF;
824 return WRDSE_EOF;
825 }
826 goto again;
827 }
7b5e8039
SP
828
829 if (alloc_space (wsp, n + 1))
c7b3f021 830 return wsp->ws_errno;
7b5e8039 831
c7b3f021 832 while (wsp->ws_head)
7b5e8039 833 {
c7b3f021
SP
834 const char *str = wsnode_ptr (wsp, wsp->ws_head);
835 size_t slen = wsnode_len (wsp->ws_head);
7b5e8039
SP
836 char *newstr = malloc (slen + 1);
837
838 /* Assign newstr first, even if it is NULL. This way
839 wordsplit_free will work even if we return
840 nomem later. */
841 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = newstr;
842 if (!newstr)
843 return _wsplt_nomem (wsp);
844 memcpy (newstr, str, slen);
845 newstr[slen] = 0;
846
c7b3f021
SP
847 wsnode_remove (wsp, wsp->ws_head);
848
7b5e8039 849 wsp->ws_wordc++;
c7b3f021 850 wsp->ws_wordi++;
7b5e8039 851
c7b3f021
SP
852 if (wsp->ws_flags & WRDSF_INCREMENTAL)
853 break;
7b5e8039
SP
854 }
855 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
856 return 0;
857}
858\f
c7b3f021
SP
859int
860wordsplit_append (wordsplit_t *wsp, int argc, char **argv)
861{
862 int rc;
863 size_t i;
7b5e8039 864
c7b3f021
SP
865 rc = alloc_space (wsp, wsp->ws_wordc + argc + 1);
866 if (rc)
867 return rc;
868 for (i = 0; i < argc; i++)
869 {
870 char *newstr = strdup (argv[i]);
871 if (!newstr)
872 {
873 while (i > 0)
874 {
875 free (wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1]);
876 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1] = NULL;
877 i--;
878 }
879 return _wsplt_nomem (wsp);
880 }
881 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i] = newstr;
882 }
883 wsp->ws_wordc += i;
884 wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
885 return 0;
886}
887\f
7b5e8039
SP
888/* Variable expansion */
889static int
890node_split_prefix (struct wordsplit *wsp,
891 struct wordsplit_node **ptail,
892 struct wordsplit_node *node,
893 size_t beg, size_t len, int flg)
894{
7b5e8039
SP
895
896 if (len == 0)
897 return 0;
8378991c
PE
898 struct wordsplit_node *newnode = wsnode_new (wsp);
899 if (!newnode)
7b5e8039
SP
900 return 1;
901 wsnode_insert (wsp, newnode, *ptail, 0);
902 if (node->flags & _WSNF_WORD)
903 {
904 const char *str = wsnode_ptr (wsp, node);
905 char *newstr = malloc (len + 1);
906 if (!newstr)
907 return _wsplt_nomem (wsp);
908 memcpy (newstr, str + beg, len);
909 newstr[len] = 0;
910 newnode->flags = _WSNF_WORD;
911 newnode->v.word = newstr;
912 }
913 else
914 {
915 newnode->v.segm.beg = node->v.segm.beg + beg;
916 newnode->v.segm.end = newnode->v.segm.beg + len;
917 }
918 newnode->flags |= flg;
919 *ptail = newnode;
920 return 0;
921}
922
923static int
c7b3f021
SP
924find_closing_paren (const char *str, size_t i, size_t len, size_t *poff,
925 char const *paren)
7b5e8039 926{
c7b3f021 927 enum { st_init, st_squote, st_dquote } state = st_init;
7b5e8039
SP
928 size_t level = 1;
929
930 for (; i < len; i++)
931 {
932 switch (state)
933 {
934 case st_init:
935 switch (str[i])
936 {
c7b3f021
SP
937 default:
938 if (str[i] == paren[0])
939 {
940 level++;
941 break;
942 }
943 else if (str[i] == paren[1])
7b5e8039 944 {
c7b3f021
SP
945 if (--level == 0)
946 {
947 *poff = i;
948 return 0;
949 }
950 break;
7b5e8039
SP
951 }
952 break;
8378991c 953
7b5e8039
SP
954 case '"':
955 state = st_dquote;
956 break;
957
958 case '\'':
959 state = st_squote;
960 break;
961 }
962 break;
963
964 case st_squote:
965 if (str[i] == '\'')
966 state = st_init;
967 break;
968
969 case st_dquote:
970 if (str[i] == '\\')
971 i++;
972 else if (str[i] == '"')
973 state = st_init;
974 break;
975 }
976 }
977 return 1;
978}
979
c7b3f021
SP
980static int
981wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len,
982 char const **ret)
7b5e8039
SP
983{
984 size_t i;
985
986 if (!(wsp->ws_flags & WRDSF_ENV))
c7b3f021 987 return WRDSE_UNDEF;
7b5e8039
SP
988
989 if (wsp->ws_flags & WRDSF_ENV_KV)
990 {
991 /* A key-value pair environment */
992 for (i = 0; wsp->ws_env[i]; i++)
993 {
994 size_t elen = strlen (wsp->ws_env[i]);
995 if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0)
c7b3f021
SP
996 {
997 *ret = wsp->ws_env[i + 1];
998 return WRDSE_OK;
999 }
7b5e8039
SP
1000 /* Skip the value. Break the loop if it is NULL. */
1001 i++;
1002 if (wsp->ws_env[i] == NULL)
1003 break;
1004 }
1005 }
c7b3f021 1006 else if (wsp->ws_env)
7b5e8039
SP
1007 {
1008 /* Usual (A=B) environment. */
1009 for (i = 0; wsp->ws_env[i]; i++)
1010 {
1011 size_t j;
1012 const char *var = wsp->ws_env[i];
1013
1014 for (j = 0; j < len; j++)
1015 if (name[j] != var[j])
1016 break;
1017 if (j == len && var[j] == '=')
c7b3f021
SP
1018 {
1019 *ret = var + j + 1;
1020 return WRDSE_OK;
1021 }
1022 }
1023 }
1024 return WRDSE_UNDEF;
1025}
1026
1027static int
1028wsplt_assign_var (struct wordsplit *wsp, const char *name, size_t namelen,
1029 char *value)
1030{
1031 int n = (wsp->ws_flags & WRDSF_ENV_KV) ? 2 : 1;
1032 char *v;
8378991c 1033
c7b3f021
SP
1034 if (wsp->ws_envidx + n >= wsp->ws_envsiz)
1035 {
1036 size_t sz;
1037 char **newenv;
1038
1039 if (!wsp->ws_envbuf)
1040 {
1041 if (wsp->ws_flags & WRDSF_ENV)
1042 {
1043 size_t i = 0, j;
1044
1045 if (wsp->ws_env)
1046 {
1047 for (; wsp->ws_env[i]; i++)
1048 ;
1049 }
8378991c 1050
c7b3f021
SP
1051 sz = i + n + 1;
1052
1053 newenv = calloc (sz, sizeof(newenv[0]));
1054 if (!newenv)
1055 return _wsplt_nomem (wsp);
1056
1057 for (j = 0; j < i; j++)
1058 {
1059 newenv[j] = strdup (wsp->ws_env[j]);
1060 if (!newenv[j])
1061 {
1062 for (; j > 1; j--)
1063 free (newenv[j-1]);
1064 free (newenv[j-1]);
9042dfc4 1065 free (newenv);
c7b3f021
SP
1066 return _wsplt_nomem (wsp);
1067 }
1068 }
1069 newenv[j] = NULL;
8378991c 1070
c7b3f021
SP
1071 wsp->ws_envbuf = newenv;
1072 wsp->ws_envidx = i;
1073 wsp->ws_envsiz = sz;
1074 wsp->ws_env = (const char**) wsp->ws_envbuf;
1075 }
1076 else
1077 {
1078 newenv = calloc (WORDSPLIT_ENV_INIT, sizeof(newenv[0]));
1079 if (!newenv)
1080 return _wsplt_nomem (wsp);
1081 wsp->ws_envbuf = newenv;
1082 wsp->ws_envidx = 0;
1083 wsp->ws_envsiz = WORDSPLIT_ENV_INIT;
1084 wsp->ws_env = (const char**) wsp->ws_envbuf;
1085 wsp->ws_flags |= WRDSF_ENV;
1086 }
1087 }
1088 else
1089 {
1090 wsp->ws_envsiz *= 2;
1091 newenv = realloc (wsp->ws_envbuf,
1092 wsp->ws_envsiz * sizeof (wsp->ws_envbuf[0]));
1093 if (!newenv)
1094 return _wsplt_nomem (wsp);
1095 wsp->ws_envbuf = newenv;
1096 wsp->ws_env = (const char**) wsp->ws_envbuf;
1097 }
1098 }
8378991c 1099
c7b3f021
SP
1100 if (wsp->ws_flags & WRDSF_ENV_KV)
1101 {
1102 /* A key-value pair environment */
1103 char *p = malloc (namelen + 1);
1104 if (!p)
1105 return _wsplt_nomem (wsp);
1106 memcpy (p, name, namelen);
1107 p[namelen] = 0;
1108
1109 v = strdup (value);
1110 if (!v)
1111 {
1112 free (p);
1113 return _wsplt_nomem (wsp);
7b5e8039 1114 }
c7b3f021
SP
1115 wsp->ws_env[wsp->ws_envidx++] = p;
1116 wsp->ws_env[wsp->ws_envidx++] = v;
7b5e8039 1117 }
c7b3f021
SP
1118 else
1119 {
1120 v = malloc (namelen + strlen(value) + 2);
1121 if (!v)
1122 return _wsplt_nomem (wsp);
1123 memcpy (v, name, namelen);
1124 v[namelen++] = '=';
1125 strcpy(v + namelen, value);
1126 wsp->ws_env[wsp->ws_envidx++] = v;
1127 }
1128 wsp->ws_env[wsp->ws_envidx++] = NULL;
1129 return WRDSE_OK;
7b5e8039
SP
1130}
1131
1132static int
1133expvar (struct wordsplit *wsp, const char *str, size_t len,
e5474174 1134 struct wordsplit_node **ptail, const char **pend, unsigned flg)
7b5e8039
SP
1135{
1136 size_t i = 0;
1137 const char *defstr = NULL;
c7b3f021 1138 char *value;
7b5e8039
SP
1139 const char *vptr;
1140 struct wordsplit_node *newnode;
1141 const char *start = str - 1;
c7b3f021
SP
1142 int rc;
1143 struct wordsplit ws;
8378991c 1144
c7b3f021 1145 if (ISVARBEG (str[0]))
7b5e8039
SP
1146 {
1147 for (i = 1; i < len; i++)
c7b3f021 1148 if (!ISVARCHR (str[i]))
7b5e8039
SP
1149 break;
1150 *pend = str + i - 1;
1151 }
1152 else if (str[0] == '{')
1153 {
1154 str++;
1155 len--;
1156 for (i = 1; i < len; i++)
7b5e8039 1157 {
c7b3f021 1158 if (str[i] == ':')
7b5e8039 1159 {
c7b3f021 1160 size_t j;
8378991c 1161
c7b3f021
SP
1162 defstr = str + i + 1;
1163 if (find_closing_paren (str, i + 1, len, &j, "{}"))
1164 return _wsplt_seterr (wsp, WRDSE_CBRACE);
1165 *pend = str + j;
1166 break;
1167 }
1168 else if (str[i] == '}')
1169 {
1170 defstr = NULL;
1171 *pend = str + i;
1172 break;
1173 }
1174 else if (strchr ("-+?=", str[i]))
1175 {
1176 size_t j;
8378991c 1177
c7b3f021
SP
1178 defstr = str + i;
1179 if (find_closing_paren (str, i, len, &j, "{}"))
1180 return _wsplt_seterr (wsp, WRDSE_CBRACE);
1181 *pend = str + j;
1182 break;
7b5e8039 1183 }
7b5e8039 1184 }
c7b3f021
SP
1185 if (i == len)
1186 return _wsplt_seterr (wsp, WRDSE_CBRACE);
7b5e8039
SP
1187 }
1188 else
1189 {
8378991c
PE
1190 newnode = wsnode_new (wsp);
1191 if (!newnode)
7b5e8039
SP
1192 return 1;
1193 wsnode_insert (wsp, newnode, *ptail, 0);
1194 *ptail = newnode;
1195 newnode->flags = _WSNF_WORD | flg;
1196 newnode->v.word = malloc (3);
1197 if (!newnode->v.word)
1198 return _wsplt_nomem (wsp);
1199 newnode->v.word[0] = '$';
1200 newnode->v.word[1] = str[0];
1201 newnode->v.word[2] = 0;
1202 *pend = str;
1203 return 0;
1204 }
1205
1206 /* Actually expand the variable */
1207 /* str - start of the variable name
1208 i - its length
1209 defstr - default replacement str */
1210
c7b3f021 1211 if (defstr && strchr("-+?=", defstr[0]) == 0)
7b5e8039 1212 {
c7b3f021
SP
1213 rc = WRDSE_UNDEF;
1214 defstr = NULL;
7b5e8039
SP
1215 }
1216 else
1217 {
c7b3f021
SP
1218 rc = wordsplit_find_env (wsp, str, i, &vptr);
1219 if (rc == WRDSE_OK)
1220 {
1221 if (vptr)
1222 {
1223 value = strdup (vptr);
1224 if (!value)
1225 rc = WRDSE_NOSPACE;
1226 }
1227 else
1228 rc = WRDSE_UNDEF;
1229 }
1230 else if (wsp->ws_flags & WRDSF_GETVAR)
1231 rc = wsp->ws_getvar (&value, str, i, wsp->ws_closure);
7b5e8039 1232 else
c7b3f021 1233 rc = WRDSE_UNDEF;
17f99bc6 1234
c7b3f021
SP
1235 if (rc == WRDSE_OK
1236 && (!value || value[0] == 0)
1237 && defstr && defstr[-1] == ':')
1238 {
1239 free (value);
1240 rc = WRDSE_UNDEF;
1241 }
1242 }
8378991c 1243
c7b3f021 1244 switch (rc)
7b5e8039 1245 {
c7b3f021
SP
1246 case WRDSE_OK:
1247 if (defstr && *defstr == '+')
7b5e8039 1248 {
c7b3f021
SP
1249 size_t size = *pend - ++defstr;
1250
1251 rc = _wsplt_subsplit (wsp, &ws, defstr, size,
1252 WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE |
1253 (wsp->ws_flags &
1254 (WRDSF_NOVAR | WRDSF_NOCMD)), 1);
1255 if (rc)
1256 return rc;
1257 free (value);
1258 value = ws.ws_wordv[0];
1259 ws.ws_wordv[0] = NULL;
1260 wordsplit_free (&ws);
7b5e8039 1261 }
c7b3f021 1262 break;
8378991c 1263
c7b3f021
SP
1264 case WRDSE_UNDEF:
1265 if (defstr)
7b5e8039 1266 {
c7b3f021
SP
1267 size_t size;
1268 if (*defstr == '-' || *defstr == '=')
1269 {
1270 size = *pend - ++defstr;
1271
1272 rc = _wsplt_subsplit (wsp, &ws, defstr, size,
1273 WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE |
1274 (wsp->ws_flags &
1275 (WRDSF_NOVAR | WRDSF_NOCMD)),
1276 1);
1277 if (rc)
1278 return rc;
1279
1280 value = ws.ws_wordv[0];
1281 ws.ws_wordv[0] = NULL;
1282 wordsplit_free (&ws);
8378991c 1283
c7b3f021
SP
1284 if (defstr[-1] == '=')
1285 wsplt_assign_var (wsp, str, i, value);
1286 }
8378991c 1287 else
c7b3f021
SP
1288 {
1289 if (*defstr == '?')
1290 {
1291 size = *pend - ++defstr;
1292 if (size == 0)
1293 wsp->ws_error (_("%.*s: variable null or not set"),
1294 (int) i, str);
1295 else
1296 {
1297 rc = _wsplt_subsplit (wsp, &ws, defstr, size,
1298 WRDSF_NOSPLIT | WRDSF_WS |
1299 WRDSF_QUOTE |
1300 (wsp->ws_flags &
1301 (WRDSF_NOVAR | WRDSF_NOCMD)),
1302 1);
1303 if (rc == 0)
1304 wsp->ws_error ("%.*s: %s",
1305 (int) i, str, ws.ws_wordv[0]);
1306 else
1307 wsp->ws_error ("%.*s: %.*s",
1308 (int) i, str, (int) size, defstr);
1309 wordsplit_free (&ws);
1310 }
1311 }
1312 value = NULL;
1313 }
1314 }
1315 else if (wsp->ws_flags & WRDSF_UNDEF)
1316 {
1317 _wsplt_seterr (wsp, WRDSE_UNDEF);
1318 return 1;
1319 }
1320 else
1321 {
1322 if (wsp->ws_flags & WRDSF_WARNUNDEF)
1323 wsp->ws_error (_("warning: undefined variable `%.*s'"),
1324 (int) i, str);
1325 if (wsp->ws_flags & WRDSF_KEEPUNDEF)
1326 value = NULL;
1327 else
1328 {
1329 value = strdup ("");
1330 if (!value)
1331 return _wsplt_nomem (wsp);
1332 }
1333 }
1334 break;
8378991c 1335
c7b3f021
SP
1336 case WRDSE_NOSPACE:
1337 return _wsplt_nomem (wsp);
1338
1339 case WRDSE_USERERR:
1340 if (wsp->ws_errno == WRDSE_USERERR)
1341 free (wsp->ws_usererr);
1342 wsp->ws_usererr = value;
e5474174 1343 FALLTHROUGH;
c7b3f021
SP
1344 default:
1345 _wsplt_seterr (wsp, rc);
1346 return 1;
1347 }
1348
1349 if (value)
1350 {
1351 if (flg & _WSNF_QUOTE)
1352 {
8378991c
PE
1353 newnode = wsnode_new (wsp);
1354 if (!newnode)
9042dfc4
SP
1355 {
1356 free (value);
1357 return 1;
1358 }
c7b3f021
SP
1359 wsnode_insert (wsp, newnode, *ptail, 0);
1360 *ptail = newnode;
1361 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
1362 newnode->v.word = value;
1363 }
1364 else if (*value == 0)
1365 {
1366 free (value);
1367 /* Empty string is a special case */
8378991c
PE
1368 newnode = wsnode_new (wsp);
1369 if (!newnode)
c7b3f021 1370 return 1;
7b5e8039
SP
1371 wsnode_insert (wsp, newnode, *ptail, 0);
1372 *ptail = newnode;
1373 newnode->flags = _WSNF_NULL;
1374 }
1375 else
1376 {
1377 struct wordsplit ws;
c7b3f021 1378 int rc;
8378991c 1379
c7b3f021
SP
1380 rc = _wsplt_subsplit (wsp, &ws, value, strlen (value),
1381 WRDSF_NOVAR | WRDSF_NOCMD |
1382 WRDSF_QUOTE
1383 | (WSP_RETURN_DELIMS (wsp) ? WRDSF_RETURN_DELIMS : 0) ,
1384 0);
1385 free (value);
1386 if (rc)
7b5e8039 1387 {
c7b3f021 1388 _wsplt_seterr_sub (wsp, &ws);
7b5e8039
SP
1389 wordsplit_free (&ws);
1390 return 1;
1391 }
c7b3f021
SP
1392 wsnode_insert (wsp, ws.ws_head, *ptail, 0);
1393 *ptail = ws.ws_tail;
1394 ws.ws_head = ws.ws_tail = NULL;
7b5e8039
SP
1395 wordsplit_free (&ws);
1396 }
1397 }
1398 else if (wsp->ws_flags & WRDSF_KEEPUNDEF)
1399 {
1400 size_t size = *pend - start + 1;
1401
8378991c
PE
1402 newnode = wsnode_new (wsp);
1403 if (!newnode)
7b5e8039
SP
1404 return 1;
1405 wsnode_insert (wsp, newnode, *ptail, 0);
1406 *ptail = newnode;
1407 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
1408 newnode->v.word = malloc (size + 1);
1409 if (!newnode->v.word)
1410 return _wsplt_nomem (wsp);
1411 memcpy (newnode->v.word, start, size);
1412 newnode->v.word[size] = 0;
1413 }
1414 else
1415 {
8378991c
PE
1416 newnode = wsnode_new (wsp);
1417 if (!newnode)
7b5e8039
SP
1418 return 1;
1419 wsnode_insert (wsp, newnode, *ptail, 0);
1420 *ptail = newnode;
1421 newnode->flags = _WSNF_NULL;
1422 }
1423 return 0;
1424}
1425
1426static int
c7b3f021
SP
1427begin_var_p (int c)
1428{
1429 return c == '{' || ISVARBEG (c);
1430}
1431
1432static int
1433node_expand (struct wordsplit *wsp, struct wordsplit_node *node,
1434 int (*beg_p) (int),
1435 int (*ws_exp_fn) (struct wordsplit *wsp,
1436 const char *str, size_t len,
1437 struct wordsplit_node **ptail,
1438 const char **pend,
e5474174 1439 unsigned flg))
7b5e8039
SP
1440{
1441 const char *str = wsnode_ptr (wsp, node);
1442 size_t slen = wsnode_len (node);
1443 const char *end = str + slen;
1444 const char *p;
1445 size_t off = 0;
1446 struct wordsplit_node *tail = node;
1447
1448 for (p = str; p < end; p++)
1449 {
1450 if (*p == '\\')
1451 {
1452 p++;
1453 continue;
1454 }
c7b3f021 1455 if (*p == '$' && beg_p (p[1]))
7b5e8039
SP
1456 {
1457 size_t n = p - str;
1458
1459 if (tail != node)
1460 tail->flags |= _WSNF_JOIN;
1461 if (node_split_prefix (wsp, &tail, node, off, n, _WSNF_JOIN))
1462 return 1;
1463 p++;
c7b3f021
SP
1464 if (ws_exp_fn (wsp, p, slen - n, &tail, &p,
1465 node->flags & (_WSNF_JOIN | _WSNF_QUOTE)))
7b5e8039
SP
1466 return 1;
1467 off += p - str + 1;
1468 str = p + 1;
1469 }
1470 }
1471 if (p > str)
1472 {
1473 if (tail != node)
1474 tail->flags |= _WSNF_JOIN;
1475 if (node_split_prefix (wsp, &tail, node, off, p - str,
c7b3f021 1476 node->flags & (_WSNF_JOIN|_WSNF_QUOTE)))
7b5e8039
SP
1477 return 1;
1478 }
1479 if (tail != node)
1480 {
1481 wsnode_remove (wsp, node);
1482 wsnode_free (node);
1483 }
1484 return 0;
1485}
8378991c 1486
c7b3f021 1487/* Remove NULL nodes from the list */
7b5e8039
SP
1488static void
1489wsnode_nullelim (struct wordsplit *wsp)
1490{
1491 struct wordsplit_node *p;
1492
1493 for (p = wsp->ws_head; p;)
1494 {
1495 struct wordsplit_node *next = p->next;
c7b3f021
SP
1496 if (p->flags & _WSNF_DELIM && p->prev)
1497 p->prev->flags &= ~_WSNF_JOIN;
7b5e8039
SP
1498 if (p->flags & _WSNF_NULL)
1499 {
1500 wsnode_remove (wsp, p);
1501 wsnode_free (p);
1502 }
1503 p = next;
1504 }
1505}
1506
1507static int
1508wordsplit_varexp (struct wordsplit *wsp)
1509{
1510 struct wordsplit_node *p;
1511
c7b3f021
SP
1512 for (p = wsp->ws_head; p;)
1513 {
1514 struct wordsplit_node *next = p->next;
1515 if (!(p->flags & (_WSNF_NOEXPAND|_WSNF_DELIM)))
1516 if (node_expand (wsp, p, begin_var_p, expvar))
1517 return 1;
1518 p = next;
1519 }
1520
1521 wsnode_nullelim (wsp);
1522 return 0;
1523}
1524\f
1525static int
1526begin_cmd_p (int c)
1527{
1528 return c == '(';
1529}
1530
1531static int
1532expcmd (struct wordsplit *wsp, const char *str, size_t len,
e5474174 1533 struct wordsplit_node **ptail, const char **pend, unsigned flg)
c7b3f021
SP
1534{
1535 int rc;
1536 size_t j;
1537 char *value;
1538 struct wordsplit_node *newnode;
8378991c 1539
c7b3f021
SP
1540 str++;
1541 len--;
1542
1543 if (find_closing_paren (str, 0, len, &j, "()"))
1544 {
1545 _wsplt_seterr (wsp, WRDSE_PAREN);
1546 return 1;
1547 }
1548
1549 *pend = str + j;
1550 if (wsp->ws_options & WRDSO_ARGV)
1551 {
1552 struct wordsplit ws;
1553
1554 rc = _wsplt_subsplit (wsp, &ws, str, j, WRDSF_WS | WRDSF_QUOTE, 1);
1555 if (rc)
1556 {
1557 _wsplt_seterr_sub (wsp, &ws);
1558 wordsplit_free (&ws);
1559 return 1;
1560 }
1561 rc = wsp->ws_command (&value, str, j, ws.ws_wordv, wsp->ws_closure);
1562 wordsplit_free (&ws);
1563 }
1564 else
1565 rc = wsp->ws_command (&value, str, j, NULL, wsp->ws_closure);
8378991c 1566
c7b3f021
SP
1567 if (rc == WRDSE_NOSPACE)
1568 return _wsplt_nomem (wsp);
1569 else if (rc)
1570 {
1571 if (rc == WRDSE_USERERR)
1572 {
1573 if (wsp->ws_errno == WRDSE_USERERR)
1574 free (wsp->ws_usererr);
1575 wsp->ws_usererr = value;
1576 }
1577 _wsplt_seterr (wsp, rc);
1578 return 1;
1579 }
1580
1581 if (value)
1582 {
1583 if (flg & _WSNF_QUOTE)
1584 {
8378991c
PE
1585 newnode = wsnode_new (wsp);
1586 if (!newnode)
c7b3f021
SP
1587 return 1;
1588 wsnode_insert (wsp, newnode, *ptail, 0);
1589 *ptail = newnode;
1590 newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
1591 newnode->v.word = value;
1592 }
1593 else if (*value == 0)
1594 {
1595 free (value);
1596 /* Empty string is a special case */
8378991c
PE
1597 newnode = wsnode_new (wsp);
1598 if (!newnode)
c7b3f021
SP
1599 return 1;
1600 wsnode_insert (wsp, newnode, *ptail, 0);
1601 *ptail = newnode;
1602 newnode->flags = _WSNF_NULL;
1603 }
1604 else
1605 {
1606 struct wordsplit ws;
1607 int rc;
1608
1609 rc = _wsplt_subsplit (wsp, &ws, value, strlen (value),
1610 WRDSF_NOVAR | WRDSF_NOCMD
1611 | WRDSF_WS | WRDSF_QUOTE
1612 | (WSP_RETURN_DELIMS (wsp) ? WRDSF_RETURN_DELIMS : 0),
1613 0);
1614 free (value);
1615 if (rc)
1616 {
1617 _wsplt_seterr_sub (wsp, &ws);
1618 wordsplit_free (&ws);
1619 return 1;
1620 }
1621 wsnode_insert (wsp, ws.ws_head, *ptail, 0);
1622 *ptail = ws.ws_tail;
1623 ws.ws_head = ws.ws_tail = NULL;
1624 wordsplit_free (&ws);
1625 }
1626 }
1627 else
1628 {
8378991c
PE
1629 newnode = wsnode_new (wsp);
1630 if (!newnode)
c7b3f021
SP
1631 return 1;
1632 wsnode_insert (wsp, newnode, *ptail, 0);
1633 *ptail = newnode;
1634 newnode->flags = _WSNF_NULL;
1635 }
1636 return 0;
1637}
1638
1639static int
1640wordsplit_cmdexp (struct wordsplit *wsp)
1641{
1642 struct wordsplit_node *p;
1643
7b5e8039
SP
1644 for (p = wsp->ws_head; p;)
1645 {
1646 struct wordsplit_node *next = p->next;
1647 if (!(p->flags & _WSNF_NOEXPAND))
c7b3f021 1648 if (node_expand (wsp, p, begin_cmd_p, expcmd))
7b5e8039
SP
1649 return 1;
1650 p = next;
1651 }
1652
1653 wsnode_nullelim (wsp);
1654 return 0;
1655}
1656\f
1657/* Strip off any leading and trailing whitespace. This function is called
1658 right after the initial scanning, therefore it assumes that every
1659 node in the list is a text reference node. */
c7b3f021 1660static int
7b5e8039
SP
1661wordsplit_trimws (struct wordsplit *wsp)
1662{
1663 struct wordsplit_node *p;
1664
1665 for (p = wsp->ws_head; p; p = p->next)
1666 {
1667 size_t n;
1668
c7b3f021
SP
1669 if (!(p->flags & _WSNF_QUOTE))
1670 {
1671 /* Skip leading whitespace: */
1672 for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]);
1673 n++)
1674 ;
1675 p->v.segm.beg = n;
1676 }
8378991c 1677
c7b3f021
SP
1678 while (p->next && (p->flags & _WSNF_JOIN))
1679 p = p->next;
8378991c 1680
7b5e8039
SP
1681 if (p->flags & _WSNF_QUOTE)
1682 continue;
8378991c 1683
7b5e8039
SP
1684 /* Trim trailing whitespace */
1685 for (n = p->v.segm.end;
1686 n > p->v.segm.beg && ISWS (wsp->ws_input[n - 1]); n--);
1687 p->v.segm.end = n;
1688 if (p->v.segm.beg == p->v.segm.end)
1689 p->flags |= _WSNF_NULL;
1690 }
1691
1692 wsnode_nullelim (wsp);
c7b3f021
SP
1693 return 0;
1694}
1695\f
1696static int
1697wordsplit_tildexpand (struct wordsplit *wsp)
1698{
1699 struct wordsplit_node *p;
1700 char *uname = NULL;
1701 size_t usize = 0;
8378991c 1702
c7b3f021
SP
1703 for (p = wsp->ws_head; p; p = p->next)
1704 {
1705 const char *str;
1706
1707 if (p->flags & _WSNF_QUOTE)
1708 continue;
1709
1710 str = wsnode_ptr (wsp, p);
1711 if (str[0] == '~')
1712 {
1713 size_t i, size, dlen;
1714 size_t slen = wsnode_len (p);
1715 struct passwd *pw;
1716 char *newstr;
8378991c 1717
c7b3f021
SP
1718 for (i = 1; i < slen && str[i] != '/'; i++)
1719 ;
1720 if (i == slen)
1721 continue;
1722 if (i > 1)
1723 {
1724 if (i > usize)
1725 {
1726 char *p = realloc (uname, i);
1727 if (!p)
1728 {
1729 free (uname);
1730 return _wsplt_nomem (wsp);
1731 }
1732 uname = p;
1733 usize = i;
1734 }
1735 --i;
1736 memcpy (uname, str + 1, i);
1737 uname[i] = 0;
1738 pw = getpwnam (uname);
1739 }
1740 else
1741 pw = getpwuid (getuid ());
1742
1743 if (!pw)
1744 continue;
1745
1746 dlen = strlen (pw->pw_dir);
1747 size = slen - i + dlen;
1748 newstr = malloc (size);
1749 if (!newstr)
1750 {
1751 free (uname);
1752 return _wsplt_nomem (wsp);
1753 }
1754 --size;
1755
1756 memcpy (newstr, pw->pw_dir, dlen);
1757 memcpy (newstr + dlen, str + i + 1, slen - i - 1);
1758 newstr[size] = 0;
1759 if (p->flags & _WSNF_WORD)
1760 free (p->v.word);
1761 p->v.word = newstr;
1762 p->flags |= _WSNF_WORD;
1763 }
1764 }
1765 free (uname);
1766 return 0;
1767}
1768\f
1769static int
1770isglob (const char *s, int l)
1771{
1772 while (l--)
1773 {
1774 if (strchr ("*?[", *s++))
1775 return 1;
1776 }
1777 return 0;
1778}
1779
1780static int
1781wordsplit_pathexpand (struct wordsplit *wsp)
1782{
1783 struct wordsplit_node *p, *next;
c7b3f021
SP
1784 size_t slen;
1785 int flags = 0;
1786
1787#ifdef GLOB_PERIOD
1788 if (wsp->ws_options & WRDSO_DOTGLOB)
1789 flags = GLOB_PERIOD;
1790#endif
8378991c 1791
c7b3f021
SP
1792 for (p = wsp->ws_head; p; p = next)
1793 {
1794 const char *str;
1795
1796 next = p->next;
1797
1798 if (p->flags & _WSNF_QUOTE)
1799 continue;
1800
1801 str = wsnode_ptr (wsp, p);
1802 slen = wsnode_len (p);
1803
1804 if (isglob (str, slen))
1805 {
1806 int i;
1807 glob_t g;
1808 struct wordsplit_node *prev;
9042dfc4 1809 char *pattern;
8378991c 1810
9042dfc4
SP
1811 pattern = malloc (slen + 1);
1812 if (!pattern)
1813 return _wsplt_nomem (wsp);
c7b3f021
SP
1814 memcpy (pattern, str, slen);
1815 pattern[slen] = 0;
8378991c 1816
c7b3f021
SP
1817 switch (glob (pattern, flags, NULL, &g))
1818 {
1819 case 0:
9042dfc4 1820 free (pattern);
c7b3f021 1821 break;
8378991c 1822
c7b3f021
SP
1823 case GLOB_NOSPACE:
1824 free (pattern);
1825 return _wsplt_nomem (wsp);
8378991c 1826
c7b3f021
SP
1827 case GLOB_NOMATCH:
1828 if (wsp->ws_options & WRDSO_NULLGLOB)
1829 {
1830 wsnode_remove (wsp, p);
1831 wsnode_free (p);
1832 }
1833 else if (wsp->ws_options & WRDSO_FAILGLOB)
1834 {
1835 char buf[128];
1836 if (wsp->ws_errno == WRDSE_USERERR)
1837 free (wsp->ws_usererr);
1838 snprintf (buf, sizeof (buf), _("no files match pattern %s"),
1839 pattern);
1840 free (pattern);
1841 wsp->ws_usererr = strdup (buf);
1842 if (!wsp->ws_usererr)
1843 return _wsplt_nomem (wsp);
1844 else
1845 return _wsplt_seterr (wsp, WRDSE_USERERR);
1846 }
9042dfc4 1847 free (pattern);
c7b3f021 1848 continue;
8378991c 1849
c7b3f021
SP
1850 default:
1851 free (pattern);
1852 return _wsplt_seterr (wsp, WRDSE_GLOBERR);
1853 }
1854
1855 prev = p;
1856 for (i = 0; i < g.gl_pathc; i++)
1857 {
8378991c 1858 struct wordsplit_node *newnode = wsnode_new (wsp);
c7b3f021 1859 char *newstr;
8378991c
PE
1860
1861 if (!newnode)
c7b3f021
SP
1862 return 1;
1863 newstr = strdup (g.gl_pathv[i]);
1864 if (!newstr)
9042dfc4
SP
1865 {
1866 wsnode_free (newnode);
1867 return _wsplt_nomem (wsp);
1868 }
c7b3f021
SP
1869 newnode->v.word = newstr;
1870 newnode->flags |= _WSNF_WORD|_WSNF_QUOTE;
1871 wsnode_insert (wsp, newnode, prev, 0);
1872 prev = newnode;
1873 }
1874 globfree (&g);
1875
1876 wsnode_remove (wsp, p);
1877 wsnode_free (p);
1878 }
1879 }
c7b3f021 1880 return 0;
7b5e8039
SP
1881}
1882\f
1883static int
1884skip_sed_expr (const char *command, size_t i, size_t len)
1885{
1886 int state;
1887
1888 do
1889 {
1890 int delim;
1891
1892 if (command[i] == ';')
1893 i++;
1894 if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])))
1895 break;
1896
1897 delim = command[++i];
1898 state = 1;
1899 for (i++; i < len; i++)
1900 {
1901 if (state == 3)
1902 {
1903 if (command[i] == delim || !ISALNUM (command[i]))
1904 break;
1905 }
1906 else if (command[i] == '\\')
1907 i++;
1908 else if (command[i] == delim)
1909 state++;
1910 }
1911 }
1912 while (state == 3 && i < len && command[i] == ';');
1913 return i;
1914}
1915
c7b3f021
SP
1916/* wsp->ws_endp points to a delimiter character. If RETURN_DELIMS
1917 is true, return its value, otherwise return the index past it. */
7fb1b687 1918static size_t
c7b3f021 1919skip_delim_internal (struct wordsplit *wsp, int return_delims)
7b5e8039 1920{
c7b3f021
SP
1921 return return_delims ? wsp->ws_endp : wsp->ws_endp + 1;
1922}
7b5e8039 1923
7fb1b687 1924static size_t
c7b3f021
SP
1925skip_delim (struct wordsplit *wsp)
1926{
1927 return skip_delim_internal (wsp, WSP_RETURN_DELIMS (wsp));
1928}
7b5e8039 1929
7fb1b687 1930static size_t
c7b3f021
SP
1931skip_delim_real (struct wordsplit *wsp)
1932{
1933 return skip_delim_internal (wsp, wsp->ws_flags & WRDSF_RETURN_DELIMS);
7b5e8039
SP
1934}
1935
1936#define _WRDS_EOF 0
1937#define _WRDS_OK 1
1938#define _WRDS_ERR 2
1939
1940static int
c7b3f021 1941scan_qstring (struct wordsplit *wsp, size_t start, size_t *end)
7b5e8039
SP
1942{
1943 size_t j;
1944 const char *command = wsp->ws_input;
1945 size_t len = wsp->ws_len;
1946 char q = command[start];
1947
1948 for (j = start + 1; j < len && command[j] != q; j++)
1949 if (q == '"' && command[j] == '\\')
1950 j++;
1951 if (j < len && command[j] == q)
1952 {
e5474174 1953 unsigned flags = _WSNF_QUOTE | _WSNF_EMPTYOK;
7b5e8039
SP
1954 if (q == '\'')
1955 flags |= _WSNF_NOEXPAND;
1956 if (wordsplit_add_segm (wsp, start + 1, j, flags))
1957 return _WRDS_ERR;
1958 *end = j;
1959 }
1960 else
1961 {
1962 wsp->ws_endp = start;
c7b3f021 1963 _wsplt_seterr (wsp, WRDSE_QUOTE);
7b5e8039
SP
1964 return _WRDS_ERR;
1965 }
1966 return 0;
1967}
1968
1969static int
c7b3f021 1970scan_word (struct wordsplit *wsp, size_t start, int consume_all)
7b5e8039
SP
1971{
1972 size_t len = wsp->ws_len;
1973 const char *command = wsp->ws_input;
1974 const char *comment = wsp->ws_comment;
1975 int join = 0;
e5474174 1976 unsigned flags = 0;
c7b3f021 1977 struct wordsplit_node *np = wsp->ws_tail;
8378991c 1978
7b5e8039
SP
1979 size_t i = start;
1980
1981 if (i >= len)
1982 {
1983 wsp->ws_errno = WRDSE_EOF;
1984 return _WRDS_EOF;
1985 }
1986
1987 start = i;
1988
1989 if (wsp->ws_flags & WRDSF_SED_EXPR
1990 && command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1]))
1991 {
1992 flags = _WSNF_SEXP;
1993 i = skip_sed_expr (command, i, len);
1994 }
c7b3f021 1995 else if (consume_all || !ISDELIM (wsp, command[i]))
7b5e8039
SP
1996 {
1997 while (i < len)
1998 {
1999 if (comment && strchr (comment, command[i]) != NULL)
2000 {
2001 size_t j;
2002 for (j = i + 1; j < len && command[j] != '\n'; j++)
2003 ;
2004 if (wordsplit_add_segm (wsp, start, i, 0))
2005 return _WRDS_ERR;
2006 wsp->ws_endp = j;
2007 return _WRDS_OK;
2008 }
2009
2010 if (wsp->ws_flags & WRDSF_QUOTE)
2011 {
2012 if (command[i] == '\\')
2013 {
2014 if (++i == len)
2015 break;
2016 i++;
2017 continue;
2018 }
2019
2020 if (((wsp->ws_flags & WRDSF_SQUOTE) && command[i] == '\'') ||
2021 ((wsp->ws_flags & WRDSF_DQUOTE) && command[i] == '"'))
2022 {
2023 if (join && wsp->ws_tail)
2024 wsp->ws_tail->flags |= _WSNF_JOIN;
2025 if (wordsplit_add_segm (wsp, start, i, _WSNF_JOIN))
2026 return _WRDS_ERR;
2027 if (scan_qstring (wsp, i, &i))
2028 return _WRDS_ERR;
2029 start = i + 1;
2030 join = 1;
2031 }
2032 }
2033
c7b3f021
SP
2034 if (command[i] == '$')
2035 {
2036 if (!(wsp->ws_flags & WRDSF_NOVAR)
2037 && command[i+1] == '{'
2038 && find_closing_paren (command, i + 2, len, &i, "{}") == 0)
2039 continue;
2040 if (!(wsp->ws_flags & WRDSF_NOCMD)
2041 && command[i+1] == '('
2042 && find_closing_paren (command, i + 2, len, &i, "()") == 0)
2043 continue;
2044 }
2045
2046 if (!consume_all && ISDELIM (wsp, command[i]))
7b5e8039
SP
2047 break;
2048 else
2049 i++;
2050 }
2051 }
c7b3f021 2052 else if (WSP_RETURN_DELIMS (wsp))
7b5e8039
SP
2053 {
2054 i++;
c7b3f021 2055 flags |= _WSNF_DELIM;
7b5e8039
SP
2056 }
2057 else if (!(wsp->ws_flags & WRDSF_SQUEEZE_DELIMS))
2058 flags |= _WSNF_EMPTYOK;
2059
2060 if (join && i > start && wsp->ws_tail)
2061 wsp->ws_tail->flags |= _WSNF_JOIN;
2062 if (wordsplit_add_segm (wsp, start, i, flags))
2063 return _WRDS_ERR;
2064 wsp->ws_endp = i;
2065 if (wsp->ws_flags & WRDSF_INCREMENTAL)
2066 return _WRDS_EOF;
8378991c 2067
c7b3f021 2068 if (consume_all)
7b5e8039 2069 {
c7b3f021
SP
2070 if (!np)
2071 np = wsp->ws_head;
2072 while (np)
2073 {
2074 np->flags |= _WSNF_QUOTE;
2075 np = np->next;
2076 }
7b5e8039 2077 }
8378991c 2078
c7b3f021 2079 return _WRDS_OK;
7b5e8039
SP
2080}
2081
2082#define to_num(c) \
2083 (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
2084
2085static int
2086xtonum (int *pval, const char *src, int base, int cnt)
2087{
2088 int i, val;
2089
2090 for (i = 0, val = 0; i < cnt; i++, src++)
2091 {
2092 int n = *(unsigned char *) src;
2093 if (n > 127 || (n = to_num (n)) >= base)
2094 break;
2095 val = val * base + n;
2096 }
2097 *pval = val;
2098 return i;
2099}
2100
2101size_t
2102wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
2103{
2104 size_t len = 0;
2105
2106 *quote = 0;
2107 for (; *str; str++)
2108 {
2109 if (strchr (" \"", *str))
2110 *quote = 1;
2111
2112 if (*str == ' ')
2113 len++;
2114 else if (*str == '"')
2115 len += 2;
2116 else if (*str != '\t' && *str != '\\' && ISPRINT (*str))
2117 len++;
2118 else if (quote_hex)
2119 len += 3;
2120 else
2121 {
c7b3f021 2122 if (wordsplit_c_quote_char (*str))
7b5e8039
SP
2123 len += 2;
2124 else
2125 len += 4;
2126 }
2127 }
2128 return len;
2129}
2130
c7b3f021
SP
2131static int
2132wsplt_unquote_char (const char *transtab, int c)
7b5e8039 2133{
c7b3f021 2134 while (*transtab && transtab[1])
7b5e8039 2135 {
c7b3f021
SP
2136 if (*transtab++ == c)
2137 return *transtab;
2138 ++transtab;
7b5e8039 2139 }
c7b3f021 2140 return 0;
7b5e8039
SP
2141}
2142
c7b3f021
SP
2143static int
2144wsplt_quote_char (const char *transtab, int c)
7b5e8039 2145{
c7b3f021 2146 for (; *transtab && transtab[1]; transtab += 2)
7b5e8039 2147 {
c7b3f021
SP
2148 if (transtab[1] == c)
2149 return *transtab;
7b5e8039 2150 }
c7b3f021
SP
2151 return 0;
2152}
2153
2154int
2155wordsplit_c_unquote_char (int c)
2156{
2157 return wsplt_unquote_char (wordsplit_c_escape_tab, c);
2158}
2159
2160int
2161wordsplit_c_quote_char (int c)
2162{
2163 return wsplt_quote_char (wordsplit_c_escape_tab, c);
7b5e8039
SP
2164}
2165
2166void
c7b3f021
SP
2167wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
2168 char *dst, const char *src, size_t n)
7b5e8039
SP
2169{
2170 int i = 0;
2171 int c;
2172
c7b3f021 2173 inquote = !!inquote;
7b5e8039
SP
2174 while (i < n)
2175 {
2176 if (src[i] == '\\')
2177 {
2178 ++i;
c7b3f021
SP
2179 if (WRDSO_ESC_TEST (ws, inquote, WRDSO_XESC)
2180 && (src[i] == 'x' || src[i] == 'X'))
7b5e8039
SP
2181 {
2182 if (n - i < 2)
2183 {
2184 *dst++ = '\\';
2185 *dst++ = src[i++];
2186 }
2187 else
2188 {
2189 int off = xtonum (&c, src + i + 1,
2190 16, 2);
2191 if (off == 0)
2192 {
2193 *dst++ = '\\';
2194 *dst++ = src[i++];
2195 }
2196 else
2197 {
2198 *dst++ = c;
2199 i += off + 1;
2200 }
2201 }
2202 }
c7b3f021
SP
2203 else if (WRDSO_ESC_TEST (ws, inquote, WRDSO_OESC)
2204 && (unsigned char) src[i] < 128 && ISDIGIT (src[i]))
7b5e8039
SP
2205 {
2206 if (n - i < 1)
2207 {
2208 *dst++ = '\\';
2209 *dst++ = src[i++];
2210 }
2211 else
2212 {
2213 int off = xtonum (&c, src + i, 8, 3);
2214 if (off == 0)
2215 {
2216 *dst++ = '\\';
2217 *dst++ = src[i++];
2218 }
2219 else
2220 {
2221 *dst++ = c;
2222 i += off;
2223 }
2224 }
2225 }
c7b3f021
SP
2226 else if ((c = wsplt_unquote_char (ws->ws_escape[inquote], src[i])))
2227 {
2228 *dst++ = c;
2229 ++i;
2230 }
7b5e8039 2231 else
c7b3f021
SP
2232 {
2233 if (WRDSO_ESC_TEST (ws, inquote, WRDSO_BSKEEP))
2234 *dst++ = '\\';
2235 *dst++ = src[i++];
2236 }
7b5e8039
SP
2237 }
2238 else
2239 *dst++ = src[i++];
2240 }
2241 *dst = 0;
2242}
2243
2244void
2245wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
2246{
2247 for (; *src; src++)
2248 {
2249 if (*src == '"')
2250 {
2251 *dst++ = '\\';
2252 *dst++ = *src;
2253 }
2254 else if (*src != '\t' && *src != '\\' && ISPRINT (*src))
2255 *dst++ = *src;
2256 else
2257 {
2258 char tmp[4];
2259
2260 if (quote_hex)
2261 {
2262 snprintf (tmp, sizeof tmp, "%%%02X", *(unsigned char *) src);
2263 memcpy (dst, tmp, 3);
2264 dst += 3;
2265 }
2266 else
2267 {
2268 int c = wordsplit_c_quote_char (*src);
2269 *dst++ = '\\';
c7b3f021 2270 if (c)
7b5e8039
SP
2271 *dst++ = c;
2272 else
2273 {
2274 snprintf (tmp, sizeof tmp, "%03o", *(unsigned char *) src);
2275 memcpy (dst, tmp, 3);
2276 dst += 3;
2277 }
2278 }
2279 }
2280 }
2281}
2282
c7b3f021
SP
2283
2284/* This structure describes a single expansion phase */
2285struct exptab
2286{
2287 char const *descr; /* Textual description (for debugging) */
2288 int flag; /* WRDSF_ bit that controls this phase */
2289 int opt; /* Entry-specific options (see EXPOPT_ flags below */
2290 int (*expansion) (struct wordsplit *wsp); /* expansion function */
2291};
2292
2293/* The following options control expansions: */
2294/* Normally the exptab entry is run if its flag bit is set in struct
2295 wordsplit. The EXPOPT_NEG option negates this test so that expansion
2296 is performed if its associated flag bit is not set in struct wordsplit. */
2297#define EXPOPT_NEG 0x01
2298/* All bits in flag must be set in order for entry to match */
2299#define EXPORT_ALLOF 0x02
2300/* Coalesce the input list before running the expansion. */
2301#define EXPOPT_COALESCE 0x04
2302
2303static struct exptab exptab[] = {
2304 { N_("WS trimming"), WRDSF_WS, 0,
2305 wordsplit_trimws },
2306 { N_("command substitution"), WRDSF_NOCMD, EXPOPT_NEG|EXPOPT_COALESCE,
2307 wordsplit_cmdexp },
2308 { N_("coalesce list"), 0, EXPOPT_NEG|EXPOPT_COALESCE,
2309 NULL },
2310 { N_("tilde expansion"), WRDSF_PATHEXPAND, 0,
2311 wordsplit_tildexpand },
2312 { N_("variable expansion"), WRDSF_NOVAR, EXPOPT_NEG,
2313 wordsplit_varexp },
2314 { N_("quote removal"), 0, EXPOPT_NEG,
2315 wsnode_quoteremoval },
2316 { N_("coalesce list"), 0, EXPOPT_NEG|EXPOPT_COALESCE,
2317 NULL },
2318 { N_("path expansion"), WRDSF_PATHEXPAND, 0,
2319 wordsplit_pathexpand },
2320 { NULL }
2321};
2322
7fb1b687
PE
2323static int
2324exptab_matches (struct exptab *p, struct wordsplit *wsp)
c7b3f021
SP
2325{
2326 int result;
2327
2328 result = (wsp->ws_flags & p->flag);
2329 if (p->opt & EXPORT_ALLOF)
2330 result = result == p->flag;
2331 if (p->opt & EXPOPT_NEG)
2332 result = !result;
2333
2334 return result;
2335}
2336
7b5e8039
SP
2337static int
2338wordsplit_process_list (struct wordsplit *wsp, size_t start)
2339{
c7b3f021
SP
2340 struct exptab *p;
2341
2342 if (wsp->ws_flags & WRDSF_SHOWDBG)
2343 wsp->ws_debug (_("(%02d) Input:%.*s;"),
2344 wsp->ws_lvl, (int) wsp->ws_len, wsp->ws_input);
8378991c 2345
c7b3f021
SP
2346 if ((wsp->ws_flags & WRDSF_NOSPLIT)
2347 || ((wsp->ws_options & WRDSO_MAXWORDS)
2348 && wsp->ws_wordi + 1 == wsp->ws_maxwords))
2349 {
2350 /* Treat entire input as a single word */
2351 if (scan_word (wsp, start, 1) == _WRDS_ERR)
7b5e8039
SP
2352 return wsp->ws_errno;
2353 }
2354 else
2355 {
2356 int rc;
2357
c7b3f021 2358 while ((rc = scan_word (wsp, start, 0)) == _WRDS_OK)
7b5e8039
SP
2359 start = skip_delim (wsp);
2360 /* Make sure tail element is not joinable */
2361 if (wsp->ws_tail)
2362 wsp->ws_tail->flags &= ~_WSNF_JOIN;
2363 if (rc == _WRDS_ERR)
2364 return wsp->ws_errno;
2365 }
2366
2367 if (wsp->ws_flags & WRDSF_SHOWDBG)
2368 {
c7b3f021 2369 wsp->ws_debug ("(%02d) %s", wsp->ws_lvl, _("Initial list:"));
7b5e8039
SP
2370 wordsplit_dump_nodes (wsp);
2371 }
2372
c7b3f021 2373 for (p = exptab; p->descr; p++)
7b5e8039 2374 {
c7b3f021 2375 if (exptab_matches(p, wsp))
7b5e8039 2376 {
c7b3f021
SP
2377 if (p->opt & EXPOPT_COALESCE)
2378 {
2379 if (wsnode_coalesce (wsp))
2380 break;
2381 if (wsp->ws_flags & WRDSF_SHOWDBG)
2382 {
2383 wsp->ws_debug ("(%02d) %s", wsp->ws_lvl,
2384 _("Coalesced list:"));
2385 wordsplit_dump_nodes (wsp);
2386 }
2387 }
2388 if (p->expansion)
2389 {
2390 if (p->expansion (wsp))
2391 break;
2392 if (wsp->ws_flags & WRDSF_SHOWDBG)
2393 {
2394 wsp->ws_debug ("(%02d) %s", wsp->ws_lvl, _(p->descr));
2395 wordsplit_dump_nodes (wsp);
2396 }
2397 }
7b5e8039
SP
2398 }
2399 }
7b5e8039
SP
2400 return wsp->ws_errno;
2401}
2402
c7b3f021
SP
2403static int
2404wordsplit_run (const char *command, size_t length, struct wordsplit *wsp,
e5474174 2405 unsigned flags, int lvl)
7b5e8039
SP
2406{
2407 int rc;
2408 size_t start;
7b5e8039
SP
2409
2410 if (!command)
2411 {
2412 if (!(flags & WRDSF_INCREMENTAL))
c7b3f021
SP
2413 return _wsplt_seterr (wsp, WRDSE_USAGE);
2414
2415 if (wsp->ws_head)
2416 return wordsplit_finish (wsp);
7b5e8039 2417
c7b3f021 2418 start = skip_delim_real (wsp);
7b5e8039 2419 if (wsp->ws_endp == wsp->ws_len)
c7b3f021 2420 return _wsplt_seterr (wsp, WRDSE_NOINPUT);
7b5e8039 2421
7b5e8039
SP
2422 wsp->ws_flags |= WRDSF_REUSE;
2423 wordsplit_init0 (wsp);
2424 }
2425 else
2426 {
7b5e8039 2427 start = 0;
c7b3f021 2428 rc = wordsplit_init (wsp, command, length, flags);
7b5e8039
SP
2429 if (rc)
2430 return rc;
c7b3f021 2431 wsp->ws_lvl = lvl;
7b5e8039
SP
2432 }
2433
7b5e8039 2434 rc = wordsplit_process_list (wsp, start);
7b5e8039 2435 if (rc)
c7b3f021
SP
2436 return rc;
2437 return wordsplit_finish (wsp);
2438}
2439
2440int
8378991c 2441wordsplit_len (const char *command, size_t length, struct wordsplit *wsp,
e5474174 2442 unsigned flags)
c7b3f021
SP
2443{
2444 return wordsplit_run (command, length, wsp, flags, 0);
7b5e8039
SP
2445}
2446
2447int
e5474174 2448wordsplit (const char *command, struct wordsplit *ws, unsigned flags)
7b5e8039 2449{
c7b3f021 2450 return wordsplit_len (command, command ? strlen (command) : 0, ws, flags);
7b5e8039
SP
2451}
2452
2453void
2454wordsplit_free_words (struct wordsplit *ws)
2455{
2456 size_t i;
2457
2458 for (i = 0; i < ws->ws_wordc; i++)
2459 {
2460 char *p = ws->ws_wordv[ws->ws_offs + i];
2461 if (p)
2462 {
2463 free (p);
2464 ws->ws_wordv[ws->ws_offs + i] = NULL;
2465 }
2466 }
2467 ws->ws_wordc = 0;
2468}
2469
c7b3f021
SP
2470void
2471wordsplit_free_envbuf (struct wordsplit *ws)
2472{
2473 if (ws->ws_flags & WRDSF_NOCMD)
2474 return;
2475 if (ws->ws_envbuf)
2476 {
2477 size_t i;
2478
2479 for (i = 0; ws->ws_envbuf[i]; i++)
2480 free (ws->ws_envbuf[i]);
2481 free (ws->ws_envbuf);
2482 ws->ws_envidx = ws->ws_envsiz = 0;
2483 ws->ws_envbuf = NULL;
2484 }
2485}
2486
2487void
2488wordsplit_clearerr (struct wordsplit *ws)
2489{
2490 if (ws->ws_errno == WRDSE_USERERR)
2491 free (ws->ws_usererr);
2492 ws->ws_usererr = NULL;
2493 ws->ws_errno = WRDSE_OK;
2494}
2495
7b5e8039
SP
2496void
2497wordsplit_free (struct wordsplit *ws)
2498{
c7b3f021 2499 wordsplit_free_nodes (ws);
7b5e8039
SP
2500 wordsplit_free_words (ws);
2501 free (ws->ws_wordv);
2502 ws->ws_wordv = NULL;
c7b3f021 2503 wordsplit_free_envbuf (ws);
7b5e8039
SP
2504}
2505
c7b3f021
SP
2506int
2507wordsplit_get_words (struct wordsplit *ws, size_t *wordc, char ***wordv)
7b5e8039 2508{
c7b3f021
SP
2509 char **p = realloc (ws->ws_wordv,
2510 (ws->ws_wordc + 1) * sizeof (ws->ws_wordv[0]));
2511 if (!p)
2512 return -1;
2513 *wordv = p;
2514 *wordc = ws->ws_wordc;
7b5e8039 2515
c7b3f021
SP
2516 ws->ws_wordv = NULL;
2517 ws->ws_wordc = 0;
2518 ws->ws_wordn = 0;
7b5e8039 2519
c7b3f021 2520 return 0;
7b5e8039
SP
2521}
2522
2523const char *_wordsplit_errstr[] = {
2524 N_("no error"),
2525 N_("missing closing quote"),
2526 N_("memory exhausted"),
7b5e8039
SP
2527 N_("invalid wordsplit usage"),
2528 N_("unbalanced curly brace"),
2529 N_("undefined variable"),
c7b3f021
SP
2530 N_("input exhausted"),
2531 N_("unbalanced parenthesis"),
2532 N_("globbing error")
7b5e8039
SP
2533};
2534int _wordsplit_nerrs =
2535 sizeof (_wordsplit_errstr) / sizeof (_wordsplit_errstr[0]);
2536
2537const char *
2538wordsplit_strerror (struct wordsplit *ws)
2539{
c7b3f021
SP
2540 if (ws->ws_errno == WRDSE_USERERR)
2541 return ws->ws_usererr;
7b5e8039
SP
2542 if (ws->ws_errno < _wordsplit_nerrs)
2543 return _wordsplit_errstr[ws->ws_errno];
2544 return N_("unknown error");
2545}
c7b3f021
SP
2546
2547void
2548wordsplit_perror (struct wordsplit *wsp)
2549{
2550 switch (wsp->ws_errno)
2551 {
2552 case WRDSE_QUOTE:
2553 wsp->ws_error (_("missing closing %c (start near #%lu)"),
2554 wsp->ws_input[wsp->ws_endp],
2555 (unsigned long) wsp->ws_endp);
2556 break;
2557
2558 default:
2559 wsp->ws_error ("%s", wordsplit_strerror (wsp));
2560 }
2561}