]> git.ipfire.org Git - thirdparty/bash.git/blame - examples/loadables/cut.c
bash-5.2 distribution sources and documentation
[thirdparty/bash.git] / examples / loadables / cut.c
CommitLineData
8868edaf
CR
1/* cut,lcut - extract specified fields from a line and assign them to an array
2 or print them to the standard output */
3
4/*
5 Copyright (C) 2020 Free Software Foundation, Inc.
6
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19*/
20
21/* See Makefile for compilation details. */
22
23#include <config.h>
24
25#if defined (HAVE_UNISTD_H)
26# include <unistd.h>
27#endif
28#include "bashansi.h"
29#include <stdio.h>
30#include <fcntl.h>
31#include <errno.h>
32
33#include "loadables.h"
34#include "shmbutil.h"
35
36#define CUT_ARRAY_DEFAULT "CUTFIELDS"
37
38#define NOPOS -2 /* sentinel for unset startpos/endpos */
39
40#define BOL 0
41#define EOL INT_MAX
42#define NORANGE -1 /* just a position, no range */
43
44#define BFLAG (1 << 0)
45#define CFLAG (1 << 1)
46#define DFLAG (1 << 2)
47#define FFLAG (1 << 3)
48#define SFLAG (1 << 4)
49
50struct cutpos
51{
52 int startpos, endpos; /* zero-based, correction done in getlist() */
53};
54
55struct cutop
56{
57 int flags;
58 int delim;
59 int npos;
60 struct cutpos *poslist;
61};
62
63static int
64poscmp (a, b)
65 void *a, *b;
66{
67 struct cutpos *p1, *p2;
68
69 p1 = (struct cutpos *)a;
70 p2 = (struct cutpos *)b;
71 return (p1->startpos - p2->startpos);
72}
73
74static int
75getlist (arg, opp)
76 char *arg;
77 struct cutpos **opp;
78{
79 char *ntok, *ltok, *larg;
80 int s, e;
81 intmax_t num;
82 struct cutpos *poslist;
83 int npos, nsize;
84
85 poslist = 0;
86 nsize = npos = 0;
87 s = e = 0;
88 larg = arg;
89 while (ltok = strsep (&larg, ","))
90 {
91 if (*ltok == 0)
92 continue;
93
94 ntok = strsep (&ltok, "-");
95 if (*ntok == 0)
96 s = BOL;
97 else
98 {
99 if (legal_number (ntok, &num) == 0 || (int)num != num || num <= 0)
100 {
101 builtin_error ("%s: invalid list value", ntok);
102 *opp = poslist;
103 return -1;
104 }
105 s = num;
106 s--; /* fields are 1-based */
107 }
108 if (ltok == 0)
109 e = NORANGE;
110 else if (*ltok == 0)
111 e = EOL;
112 else
113 {
114 if (legal_number (ltok, &num) == 0 || (int)num != num || num <= 0)
115 {
116 builtin_error ("%s: invalid list value", ltok);
117 *opp = poslist;
118 return -1;
119 }
120 e = num;
121 e--;
122 if (e == s)
123 e = NORANGE;
124 }
125
126 if (npos == nsize)
127 {
128 nsize += 4;
129 poslist = (struct cutpos *)xrealloc (poslist, nsize * sizeof (struct cutpos));
130 }
131 poslist[npos].startpos = s;
132 poslist[npos].endpos = e;
133 npos++;
134 }
135 if (npos == 0)
136 {
137 builtin_error ("missing list of positions");
138 *opp = poslist;
139 return -1;
140 }
141
142 qsort (poslist, npos, sizeof(poslist[0]), poscmp);
143 *opp = poslist;
144
145 return npos;
146}
147
148static int
149cutbytes (v, line, ops)
150 SHELL_VAR *v;
151 char *line;
152 struct cutop *ops;
153{
154 arrayind_t ind;
155 char *buf, *bmap;
156 size_t llen;
157 int i, b, n, s, e;
158
159 llen = strlen (line);
160 buf = xmalloc (llen + 1);
161 bmap = xmalloc (llen + 1);
162 memset (bmap, 0, llen);
163
164 for (n = 0; n < ops->npos; n++)
165 {
166 s = ops->poslist[n].startpos; /* no translation needed yet */
167 e = ops->poslist[n].endpos;
168 if (e == NORANGE)
169 e = s;
170 else if (e == EOL || e >= llen)
171 e = llen - 1;
172 /* even if a column is specified multiple times, it will only be printed
173 once */
174 for (i = s; i <= e; i++)
175 bmap[i] = 1;
176 }
177
178 b = 0;
179 for (i = 0; i < llen; i++)
180 if (bmap[i])
181 buf[b++] = line[i];
182 buf[b] = 0;
183
184 if (v)
185 {
186 ind = 0;
187 bind_array_element (v, ind, buf, 0);
188 ind++;
189 }
190 else
191 printf ("%s\n", buf);
192
193 free (buf);
194 free (bmap);
195
196 return ind;
197}
198
199static int
200cutchars (v, line, ops)
201 SHELL_VAR *v;
202 char *line;
203 struct cutop *ops;
204{
205 arrayind_t ind;
206 char *buf, *bmap;
207 wchar_t *wbuf, *wb2;
208 size_t llen, wlen;
209 int i, b, n, s, e;
210
211 if (MB_CUR_MAX == 1)
212 return (cutbytes (v, line, ops));
213 if (locale_utf8locale && utf8_mbsmbchar (line) == 0)
214 return (cutbytes (v, line, ops));
215
216 llen = strlen (line);
217 wbuf = (wchar_t *)xmalloc ((llen + 1) * sizeof (wchar_t));
218
219 wlen = mbstowcs (wbuf, line, llen);
220 if (MB_INVALIDCH (wlen))
221 {
222 free (wbuf);
223 return (cutbytes (v, line, ops));
224 }
225
226 bmap = xmalloc (llen + 1);
227 memset (bmap, 0, llen);
228
229 for (n = 0; n < ops->npos; n++)
230 {
231 s = ops->poslist[n].startpos; /* no translation needed yet */
232 e = ops->poslist[n].endpos;
233 if (e == NORANGE)
234 e = s;
235 else if (e == EOL || e >= wlen)
236 e = wlen - 1;
237 /* even if a column is specified multiple times, it will only be printed
238 once */
239 for (i = s; i <= e; i++)
240 bmap[i] = 1;
241 }
242
243 wb2 = (wchar_t *)xmalloc ((wlen + 1) * sizeof (wchar_t));
244 b = 0;
245 for (i = 0; i < wlen; i++)
246 if (bmap[i])
247 wb2[b++] = wbuf[i];
248 wb2[b] = 0;
249
250 free (wbuf);
251
252 buf = bmap;
253 n = wcstombs (buf, wb2, llen);
254
255 if (v)
256 {
257 ind = 0;
258 bind_array_element (v, ind, buf, 0);
259 ind++;
260 }
261 else
262 printf ("%s\n", buf);
263
264 free (buf);
265 free (wb2);
266
267 return ind;
268}
269
270/* The basic strategy is to cut the line into fields using strsep, populate
271 an array of fields from 0..nf, then select those fields using the same
272 bitmap approach as cut{bytes,chars} and assign them to the array variable
273 V or print them on stdout. This function obeys SFLAG. */
274static int
275cutfields (v, line, ops)
276 SHELL_VAR *v;
277 char *line;
278 struct cutop *ops;
279{
280 arrayind_t ind;
281 char *buf, *bmap, *field, **fields, delim[2];
282 size_t llen, fsize;
283 int i, b, n, s, e, nf;
284
285 ind = 0;
286
287 delim[0] = ops->delim;
288 delim[1] = '\0';
289
290 fields = 0;
291 nf = 0;
292 fsize = 0;
293
294 field = buf = line;
295 do
296 {
297 field = strsep (&buf, delim); /* destructive */
298 if (nf == fsize)
299 {
300 fsize += 8;
301 fields = xrealloc (fields, fsize * sizeof (char *));
302 }
303 fields[nf] = field;
304 if (field)
305 nf++;
306 }
307 while (field);
308
309 if (nf == 1)
310 {
311 free (fields);
312 if (ops->flags & SFLAG)
313 return ind;
314 if (v)
315 {
316 bind_array_element (v, ind, line, 0);
317 ind++;
318 }
319 else
320 printf ("%s\n", line);
321 return ind;
322 }
323
324 bmap = xmalloc (nf + 1);
325 memset (bmap, 0, nf);
326
327 for (n = 0; n < ops->npos; n++)
328 {
329 s = ops->poslist[n].startpos; /* no translation needed yet */
330 e = ops->poslist[n].endpos;
331 if (e == NORANGE)
332 e = s;
333 else if (e == EOL || e >= nf)
334 e = nf - 1;
335 /* even if a column is specified multiple times, it will only be printed
336 once */
337 for (i = s; i <= e; i++)
338 bmap[i] = 1;
339 }
340
341 for (i = 1, b = 0; b < nf; b++)
342 {
343 if (bmap[b] == 0)
344 continue;
345 if (v)
346 {
347 bind_array_element (v, ind, fields[b], 0);
348 ind++;
349 }
350 else
351 {
352 if (i == 0)
353 putchar (ops->delim);
354 printf ("%s", fields[b]);
355 }
356 i = 0;
357 }
358 if (v == 0)
359 putchar ('\n');
360
361 return nf;
362}
363
364static int
365cutline (v, line, ops)
366 SHELL_VAR *v;
367 char *line;
368 struct cutop *ops;
369{
370 int rval;
371
372 if (ops->flags & BFLAG)
373 rval = cutbytes (v, line, ops);
374 else if (ops->flags & CFLAG)
375 rval = cutchars (v, line, ops);
376 else
377 rval = cutfields (v, line, ops);
378
379 return (rval >= 0 ? EXECUTION_SUCCESS : EXECUTION_FAILURE);
380}
381
382static int
383cutfile (v, list, ops)
384 SHELL_VAR *v;
385 WORD_LIST *list;
386 struct cutop *ops;
387{
388 int fd, unbuffered_read;
389 char *line, *b;
390 size_t llen;
391 WORD_LIST *l;
392 ssize_t n;
393
394 line = 0;
395 llen = 0;
396
397 l = list;
398 do
399 {
400 /* for each file */
401 if (l == 0 || (l->word->word[0] == '-' && l->word->word[1] == '\0'))
402 fd = 0;
403 else
404 fd = open (l->word->word, O_RDONLY);
405 if (fd < 0)
406 {
407 file_error (l->word->word);
408 return (EXECUTION_FAILURE);
409 }
410
411#ifndef __CYGWIN__
412 unbuffered_read = (lseek (fd, 0L, SEEK_CUR) < 0) && (errno == ESPIPE);
413#else
414 unbuffered_read = 1;
415#endif
416
417 while ((n = zgetline (fd, &line, &llen, '\n', unbuffered_read)) != -1)
74091dd4
CR
418 {
419 QUIT;
420 if (line[n] == '\n')
421 line[n] = '\0'; /* cutline expects no newline terminator */
422 cutline (v, line, ops); /* can modify line */
423 }
8868edaf
CR
424 if (fd > 0)
425 close (fd);
426
74091dd4 427 QUIT;
8868edaf
CR
428 if (l)
429 l = l->next;
430 }
431 while (l);
432
433 free (line);
434 return EXECUTION_SUCCESS;
435}
436
437#define OPTSET(x) ((cutflags & (x)) ? 1 : 0)
438
439static int
440cut_internal (which, list)
441 int which; /* not used yet */
442 WORD_LIST *list;
443{
444 int opt, rval, cutflags, delim, npos;
445 char *array_name, *cutstring, *list_arg;
446 SHELL_VAR *v;
447 struct cutop op;
448 struct cutpos *poslist;
449
450 v = 0;
451 rval = EXECUTION_SUCCESS;
452
453 cutflags = 0;
454 array_name = 0;
455 list_arg = 0;
456 delim = '\t';
457
458 reset_internal_getopt ();
459 while ((opt = internal_getopt (list, "a:b:c:d:f:sn")) != -1)
460 {
461 switch (opt)
462 {
463 case 'a':
464 array_name = list_optarg;
465 break;
466 case 'b':
467 cutflags |= BFLAG;
468 list_arg = list_optarg;
469 break;
470 case 'c':
471 cutflags |= CFLAG;
472 list_arg = list_optarg;
473 break;
474 case 'd':
475 cutflags |= DFLAG;
476 delim = list_optarg[0];
477 if (delim == 0 || list_optarg[1])
478 {
479 builtin_error ("delimiter must be a single non-null character");
480 return (EX_USAGE);
481 }
482 break;
483 case 'f':
484 cutflags |= FFLAG;
485 list_arg = list_optarg;
486 break;
487 case 'n':
488 break;
489 case 's':
490 cutflags |= SFLAG;
491 break;
492 CASE_HELPOPT;
493 default:
494 builtin_usage ();
495 return (EX_USAGE);
496 }
497 }
498 list = loptend;
499
500 if (array_name && (legal_identifier (array_name) == 0))
501 {
502 sh_invalidid (array_name);
503 return (EXECUTION_FAILURE);
504 }
505
506 if (list == 0 && which == 0)
507 {
508 builtin_error ("string argument required");
509 return (EX_USAGE);
510 }
511
512 /* options are mutually exclusive and one is required */
513 if ((OPTSET (BFLAG) + OPTSET (CFLAG) + OPTSET (FFLAG)) != 1)
514 {
515 builtin_usage ();
516 return (EX_USAGE);
517 }
518
519 if ((npos = getlist (list_arg, &poslist)) < 0)
520 {
521 free (poslist);
522 return (EXECUTION_FAILURE);
523 }
524
525 if (array_name)
526 {
527 v = find_or_make_array_variable (array_name, 1);
528 if (v == 0 || readonly_p (v) || noassign_p (v))
529 {
530 if (v && readonly_p (v))
531 err_readonly (array_name);
532 return (EXECUTION_FAILURE);
533 }
534 else if (array_p (v) == 0)
535 {
536 builtin_error ("%s: not an indexed array", array_name);
537 return (EXECUTION_FAILURE);
538 }
539 if (invisible_p (v))
540 VUNSETATTR (v, att_invisible);
541 array_flush (array_cell (v));
542 }
543
544 op.flags = cutflags;
545 op.delim = delim;
546 op.npos = npos;
547 op.poslist = poslist;
548
549 /* we implement cut as a builtin with a cutfile() function that opens each
550 filename in LIST as a filename (or `-' for stdin) and runs cutline on
551 every line in the file. */
552 if (which == 0)
553 {
554 cutstring = list->word->word;
555 if (cutstring == 0 || *cutstring == 0)
556 {
557 free (poslist);
558 return (EXECUTION_SUCCESS);
559 }
560 rval = cutline (v, cutstring, &op);
561 }
562 else
563 rval = cutfile (v, list, &op);
564
565 return (rval);
566}
567
568int
569lcut_builtin (list)
570 WORD_LIST *list;
571{
572 return (cut_internal (0, list));
573}
574
575int
576cut_builtin (list)
577 WORD_LIST *list;
578{
579 return (cut_internal (1, list));
580}
581
582char *lcut_doc[] = {
583 "Extract selected fields from a string.",
584 "",
585 "Select portions of LINE (as specified by LIST) and assign them to",
586 "elements of the indexed array ARRAY starting at index 0, or write",
587 "them to the standard output if -a is not specified.",
588 "",
589 "Items specified by LIST are either column positions or fields delimited",
590 "by a special character, and are described more completely in cut(1).",
591 "",
592 "Columns correspond to bytes (-b), characters (-c), or fields (-f). The",
593 "field delimiter is specified by -d (default TAB). Column numbering",
594 "starts at 1.",
595 (char *)NULL
596};
597
598struct builtin lcut_struct = {
599 "lcut", /* builtin name */
600 lcut_builtin, /* function implementing the builtin */
601 BUILTIN_ENABLED, /* initial flags for builtin */
602 lcut_doc, /* array of long documentation strings. */
603 "lcut [-a ARRAY] [-b LIST] [-c LIST] [-f LIST] [-d CHAR] [-sn] line", /* usage synopsis; becomes short_doc */
604 0 /* reserved for internal use */
605};
606
607char *cut_doc[] = {
608 "Extract selected fields from each line of a file.",
609 "",
610 "Select portions of each line (as specified by LIST) from each FILE",
611 "and write them to the standard output. cut reads from the standard",
612 "input if no FILE arguments are specified or if a FILE argument is a",
613 "single hyphen.",
614 "",
615 "Items specified by LIST are either column positions or fields delimited",
616 "by a special character, and are described more completely in cut(1).",
617 "",
618 "Columns correspond to bytes (-b), characters (-c), or fields (-f). The",
619 "field delimiter is specified by -d (default TAB). Column numbering",
620 "starts at 1.",
621 (char *)NULL
622};
623
624struct builtin cut_struct = {
625 "cut", /* builtin name */
626 cut_builtin, /* function implementing the builtin */
627 BUILTIN_ENABLED, /* initial flags for builtin */
628 cut_doc, /* array of long documentation strings. */
629 "cut [-a ARRAY] [-b LIST] [-c LIST] [-f LIST] [-d CHAR] [-sn] [file ...]", /* usage synopsis; becomes short_doc */
630 0 /* reserved for internal use */
631};