]> git.ipfire.org Git - thirdparty/bash.git/blob - examples/loadables/cut.c
49d3547c2475d7772e23438fe4272007a3a42c60
[thirdparty/bash.git] / examples / loadables / cut.c
1 /* cut,lcut - extract specified fields from a line and assign them to an array
2 or print them to the standard output */
3
4 /*
5 Copyright (C) 2020,2022 Free Software Foundation, Inc.
6
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 /* See Makefile for compilation details. */
22
23 #include <config.h>
24
25 #if defined (HAVE_UNISTD_H)
26 # include <unistd.h>
27 #endif
28 #include "bashansi.h"
29 #include <stdio.h>
30 #include <fcntl.h>
31 #include <errno.h>
32
33 #include "loadables.h"
34 #include "shmbutil.h"
35
36 #define CUT_ARRAY_DEFAULT "CUTFIELDS"
37
38 #define NOPOS -2 /* sentinel for unset startpos/endpos */
39
40 #define BOL 0
41 #define EOL INT_MAX
42 #define NORANGE -1 /* just a position, no range */
43
44 #define BFLAG (1 << 0)
45 #define CFLAG (1 << 1)
46 #define DFLAG (1 << 2)
47 #define FFLAG (1 << 3)
48 #define SFLAG (1 << 4)
49
50 struct cutpos
51 {
52 int startpos, endpos; /* zero-based, correction done in getlist() */
53 };
54
55 struct cutop
56 {
57 int flags;
58 int delim;
59 int npos;
60 struct cutpos *poslist;
61 };
62
63 static int
64 poscmp (const void *a, const void *b)
65 {
66 struct cutpos *p1, *p2;
67
68 p1 = (struct cutpos *)a;
69 p2 = (struct cutpos *)b;
70 return (p1->startpos - p2->startpos);
71 }
72
73 static int
74 getlist (char *arg, struct cutpos **opp)
75 {
76 char *ntok, *ltok, *larg;
77 int s, e;
78 intmax_t num;
79 struct cutpos *poslist;
80 int npos, nsize;
81
82 poslist = 0;
83 nsize = npos = 0;
84 s = e = 0;
85 larg = arg;
86 while (ltok = strsep (&larg, ","))
87 {
88 if (*ltok == 0)
89 continue;
90
91 ntok = strsep (&ltok, "-");
92 if (*ntok == 0)
93 s = BOL;
94 else
95 {
96 if (legal_number (ntok, &num) == 0 || (int)num != num || num <= 0)
97 {
98 builtin_error ("%s: invalid list value", ntok);
99 *opp = poslist;
100 return -1;
101 }
102 s = num;
103 s--; /* fields are 1-based */
104 }
105 if (ltok == 0)
106 e = NORANGE;
107 else if (*ltok == 0)
108 e = EOL;
109 else
110 {
111 if (legal_number (ltok, &num) == 0 || (int)num != num || num <= 0)
112 {
113 builtin_error ("%s: invalid list value", ltok);
114 *opp = poslist;
115 return -1;
116 }
117 e = num;
118 e--;
119 if (e == s)
120 e = NORANGE;
121 }
122
123 if (npos == nsize)
124 {
125 nsize += 4;
126 poslist = (struct cutpos *)xrealloc (poslist, nsize * sizeof (struct cutpos));
127 }
128 poslist[npos].startpos = s;
129 poslist[npos].endpos = e;
130 npos++;
131 }
132 if (npos == 0)
133 {
134 builtin_error ("missing list of positions");
135 *opp = poslist;
136 return -1;
137 }
138
139 qsort (poslist, npos, sizeof(poslist[0]), poscmp);
140 *opp = poslist;
141
142 return npos;
143 }
144
145 static int
146 cutbytes (SHELL_VAR *v, char *line, struct cutop *ops)
147 {
148 arrayind_t ind;
149 char *buf, *bmap;
150 size_t llen;
151 int i, b, n, s, e;
152
153 llen = strlen (line);
154 buf = xmalloc (llen + 1);
155 bmap = xmalloc (llen + 1);
156 memset (bmap, 0, llen);
157
158 for (n = 0; n < ops->npos; n++)
159 {
160 s = ops->poslist[n].startpos; /* no translation needed yet */
161 e = ops->poslist[n].endpos;
162 if (e == NORANGE)
163 e = s;
164 else if (e == EOL || e >= llen)
165 e = llen - 1;
166 /* even if a column is specified multiple times, it will only be printed
167 once */
168 for (i = s; i <= e; i++)
169 bmap[i] = 1;
170 }
171
172 b = 0;
173 for (i = 0; i < llen; i++)
174 if (bmap[i])
175 buf[b++] = line[i];
176 buf[b] = 0;
177
178 if (v)
179 {
180 ind = 0;
181 bind_array_element (v, ind, buf, 0);
182 ind++;
183 }
184 else
185 printf ("%s\n", buf);
186
187 free (buf);
188 free (bmap);
189
190 return ind;
191 }
192
193 static int
194 cutchars (SHELL_VAR *v, char *line, struct cutop *ops)
195 {
196 arrayind_t ind;
197 char *buf, *bmap;
198 wchar_t *wbuf, *wb2;
199 size_t llen, wlen;
200 int i, b, n, s, e;
201
202 if (MB_CUR_MAX == 1)
203 return (cutbytes (v, line, ops));
204 if (locale_utf8locale && utf8_mbsmbchar (line) == 0)
205 return (cutbytes (v, line, ops));
206
207 llen = strlen (line);
208 wbuf = (wchar_t *)xmalloc ((llen + 1) * sizeof (wchar_t));
209
210 wlen = mbstowcs (wbuf, line, llen);
211 if (MB_INVALIDCH (wlen))
212 {
213 free (wbuf);
214 return (cutbytes (v, line, ops));
215 }
216
217 bmap = xmalloc (llen + 1);
218 memset (bmap, 0, llen);
219
220 for (n = 0; n < ops->npos; n++)
221 {
222 s = ops->poslist[n].startpos; /* no translation needed yet */
223 e = ops->poslist[n].endpos;
224 if (e == NORANGE)
225 e = s;
226 else if (e == EOL || e >= wlen)
227 e = wlen - 1;
228 /* even if a column is specified multiple times, it will only be printed
229 once */
230 for (i = s; i <= e; i++)
231 bmap[i] = 1;
232 }
233
234 wb2 = (wchar_t *)xmalloc ((wlen + 1) * sizeof (wchar_t));
235 b = 0;
236 for (i = 0; i < wlen; i++)
237 if (bmap[i])
238 wb2[b++] = wbuf[i];
239 wb2[b] = 0;
240
241 free (wbuf);
242
243 buf = bmap;
244 n = wcstombs (buf, wb2, llen);
245
246 if (v)
247 {
248 ind = 0;
249 bind_array_element (v, ind, buf, 0);
250 ind++;
251 }
252 else
253 printf ("%s\n", buf);
254
255 free (buf);
256 free (wb2);
257
258 return ind;
259 }
260
261 /* The basic strategy is to cut the line into fields using strsep, populate
262 an array of fields from 0..nf, then select those fields using the same
263 bitmap approach as cut{bytes,chars} and assign them to the array variable
264 V or print them on stdout. This function obeys SFLAG. */
265 static int
266 cutfields (SHELL_VAR *v, char *line, struct cutop *ops)
267 {
268 arrayind_t ind;
269 char *buf, *bmap, *field, **fields, delim[2];
270 size_t llen, fsize;
271 int i, b, n, s, e, nf;
272
273 ind = 0;
274
275 delim[0] = ops->delim;
276 delim[1] = '\0';
277
278 fields = 0;
279 nf = 0;
280 fsize = 0;
281
282 field = buf = line;
283 do
284 {
285 field = strsep (&buf, delim); /* destructive */
286 if (nf == fsize)
287 {
288 fsize += 8;
289 fields = xrealloc (fields, fsize * sizeof (char *));
290 }
291 fields[nf] = field;
292 if (field)
293 nf++;
294 }
295 while (field);
296
297 if (nf == 1)
298 {
299 free (fields);
300 if (ops->flags & SFLAG)
301 return ind;
302 if (v)
303 {
304 bind_array_element (v, ind, line, 0);
305 ind++;
306 }
307 else
308 printf ("%s\n", line);
309 return ind;
310 }
311
312 bmap = xmalloc (nf + 1);
313 memset (bmap, 0, nf);
314
315 for (n = 0; n < ops->npos; n++)
316 {
317 s = ops->poslist[n].startpos; /* no translation needed yet */
318 e = ops->poslist[n].endpos;
319 if (e == NORANGE)
320 e = s;
321 else if (e == EOL || e >= nf)
322 e = nf - 1;
323 /* even if a column is specified multiple times, it will only be printed
324 once */
325 for (i = s; i <= e; i++)
326 bmap[i] = 1;
327 }
328
329 for (i = 1, b = 0; b < nf; b++)
330 {
331 if (bmap[b] == 0)
332 continue;
333 if (v)
334 {
335 bind_array_element (v, ind, fields[b], 0);
336 ind++;
337 }
338 else
339 {
340 if (i == 0)
341 putchar (ops->delim);
342 printf ("%s", fields[b]);
343 }
344 i = 0;
345 }
346 if (v == 0)
347 putchar ('\n');
348
349 return nf;
350 }
351
352 static int
353 cutline (SHELL_VAR *v, char *line, struct cutop *ops)
354 {
355 int rval;
356
357 if (ops->flags & BFLAG)
358 rval = cutbytes (v, line, ops);
359 else if (ops->flags & CFLAG)
360 rval = cutchars (v, line, ops);
361 else
362 rval = cutfields (v, line, ops);
363
364 return (rval >= 0 ? EXECUTION_SUCCESS : EXECUTION_FAILURE);
365 }
366
367 static int
368 cutfile (SHELL_VAR *v, WORD_LIST *list, struct cutop *ops)
369 {
370 int fd, unbuffered_read;
371 char *line, *b;
372 size_t llen;
373 WORD_LIST *l;
374 ssize_t n;
375
376 line = 0;
377 llen = 0;
378
379 l = list;
380 do
381 {
382 /* for each file */
383 if (l == 0 || (l->word->word[0] == '-' && l->word->word[1] == '\0'))
384 fd = 0;
385 else
386 fd = open (l->word->word, O_RDONLY);
387 if (fd < 0)
388 {
389 file_error (l->word->word);
390 return (EXECUTION_FAILURE);
391 }
392
393 #ifndef __CYGWIN__
394 unbuffered_read = (lseek (fd, 0L, SEEK_CUR) < 0) && (errno == ESPIPE);
395 #else
396 unbuffered_read = 1;
397 #endif
398
399 while ((n = zgetline (fd, &line, &llen, '\n', unbuffered_read)) != -1)
400 {
401 QUIT;
402 if (line[n] == '\n')
403 line[n] = '\0'; /* cutline expects no newline terminator */
404 cutline (v, line, ops); /* can modify line */
405 }
406 if (fd > 0)
407 close (fd);
408
409 QUIT;
410 if (l)
411 l = l->next;
412 }
413 while (l);
414
415 free (line);
416 return EXECUTION_SUCCESS;
417 }
418
419 #define OPTSET(x) ((cutflags & (x)) ? 1 : 0)
420
421 static int
422 cut_internal (int which, WORD_LIST *list)
423 {
424 int opt, rval, cutflags, delim, npos;
425 char *array_name, *cutstring, *list_arg;
426 SHELL_VAR *v;
427 struct cutop op;
428 struct cutpos *poslist;
429
430 v = 0;
431 rval = EXECUTION_SUCCESS;
432
433 cutflags = 0;
434 array_name = 0;
435 list_arg = 0;
436 delim = '\t';
437
438 reset_internal_getopt ();
439 while ((opt = internal_getopt (list, "a:b:c:d:f:sn")) != -1)
440 {
441 switch (opt)
442 {
443 case 'a':
444 array_name = list_optarg;
445 break;
446 case 'b':
447 cutflags |= BFLAG;
448 list_arg = list_optarg;
449 break;
450 case 'c':
451 cutflags |= CFLAG;
452 list_arg = list_optarg;
453 break;
454 case 'd':
455 cutflags |= DFLAG;
456 delim = list_optarg[0];
457 if (delim == 0 || list_optarg[1])
458 {
459 builtin_error ("delimiter must be a single non-null character");
460 return (EX_USAGE);
461 }
462 break;
463 case 'f':
464 cutflags |= FFLAG;
465 list_arg = list_optarg;
466 break;
467 case 'n':
468 break;
469 case 's':
470 cutflags |= SFLAG;
471 break;
472 CASE_HELPOPT;
473 default:
474 builtin_usage ();
475 return (EX_USAGE);
476 }
477 }
478 list = loptend;
479
480 if (array_name && (legal_identifier (array_name) == 0))
481 {
482 sh_invalidid (array_name);
483 return (EXECUTION_FAILURE);
484 }
485
486 if (list == 0 && which == 0)
487 {
488 builtin_error ("string argument required");
489 return (EX_USAGE);
490 }
491
492 /* options are mutually exclusive and one is required */
493 if ((OPTSET (BFLAG) + OPTSET (CFLAG) + OPTSET (FFLAG)) != 1)
494 {
495 builtin_usage ();
496 return (EX_USAGE);
497 }
498
499 if ((npos = getlist (list_arg, &poslist)) < 0)
500 {
501 free (poslist);
502 return (EXECUTION_FAILURE);
503 }
504
505 if (array_name)
506 {
507 v = builtin_find_indexed_array (array_name, 1);
508 if (v == 0)
509 {
510 free (poslist);
511 return (EXECUTION_FAILURE);
512 }
513 }
514
515 op.flags = cutflags;
516 op.delim = delim;
517 op.npos = npos;
518 op.poslist = poslist;
519
520 /* we implement cut as a builtin with a cutfile() function that opens each
521 filename in LIST as a filename (or `-' for stdin) and runs cutline on
522 every line in the file. */
523 if (which == 0)
524 {
525 cutstring = list->word->word;
526 if (cutstring == 0 || *cutstring == 0)
527 {
528 free (poslist);
529 return (EXECUTION_SUCCESS);
530 }
531 rval = cutline (v, cutstring, &op);
532 }
533 else
534 rval = cutfile (v, list, &op);
535
536 free (poslist);
537 return (rval);
538 }
539
540 int
541 lcut_builtin (WORD_LIST *list)
542 {
543 return (cut_internal (0, list));
544 }
545
546 int
547 cut_builtin (WORD_LIST *list)
548 {
549 return (cut_internal (1, list));
550 }
551
552 char *lcut_doc[] = {
553 "Extract selected fields from a string.",
554 "",
555 "Select portions of LINE (as specified by LIST) and assign them to",
556 "elements of the indexed array ARRAY starting at index 0, or write",
557 "them to the standard output if -a is not specified.",
558 "",
559 "Items specified by LIST are either column positions or fields delimited",
560 "by a special character, and are described more completely in cut(1).",
561 "",
562 "Columns correspond to bytes (-b), characters (-c), or fields (-f). The",
563 "field delimiter is specified by -d (default TAB). Column numbering",
564 "starts at 1.",
565 (char *)NULL
566 };
567
568 struct builtin lcut_struct = {
569 "lcut", /* builtin name */
570 lcut_builtin, /* function implementing the builtin */
571 BUILTIN_ENABLED, /* initial flags for builtin */
572 lcut_doc, /* array of long documentation strings. */
573 "lcut [-a ARRAY] [-b LIST] [-c LIST] [-f LIST] [-d CHAR] [-sn] line", /* usage synopsis; becomes short_doc */
574 0 /* reserved for internal use */
575 };
576
577 char *cut_doc[] = {
578 "Extract selected fields from each line of a file.",
579 "",
580 "Select portions of each line (as specified by LIST) from each FILE",
581 "and write them to the standard output. cut reads from the standard",
582 "input if no FILE arguments are specified or if a FILE argument is a",
583 "single hyphen.",
584 "",
585 "Items specified by LIST are either column positions or fields delimited",
586 "by a special character, and are described more completely in cut(1).",
587 "",
588 "Columns correspond to bytes (-b), characters (-c), or fields (-f). The",
589 "field delimiter is specified by -d (default TAB). Column numbering",
590 "starts at 1.",
591 (char *)NULL
592 };
593
594 struct builtin cut_struct = {
595 "cut", /* builtin name */
596 cut_builtin, /* function implementing the builtin */
597 BUILTIN_ENABLED, /* initial flags for builtin */
598 cut_doc, /* array of long documentation strings. */
599 "cut [-a ARRAY] [-b LIST] [-c LIST] [-f LIST] [-d CHAR] [-sn] [file ...]", /* usage synopsis; becomes short_doc */
600 0 /* reserved for internal use */
601 };