]>
git.ipfire.org Git - thirdparty/bash.git/blob - examples/loadables/cut.c
49d3547c2475d7772e23438fe4272007a3a42c60
1 /* cut,lcut - extract specified fields from a line and assign them to an array
2 or print them to the standard output */
5 Copyright (C) 2020,2022 Free Software Foundation, Inc.
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
21 /* See Makefile for compilation details. */
25 #if defined (HAVE_UNISTD_H)
33 #include "loadables.h"
36 #define CUT_ARRAY_DEFAULT "CUTFIELDS"
38 #define NOPOS -2 /* sentinel for unset startpos/endpos */
42 #define NORANGE -1 /* just a position, no range */
44 #define BFLAG (1 << 0)
45 #define CFLAG (1 << 1)
46 #define DFLAG (1 << 2)
47 #define FFLAG (1 << 3)
48 #define SFLAG (1 << 4)
52 int startpos
, endpos
; /* zero-based, correction done in getlist() */
60 struct cutpos
*poslist
;
64 poscmp (const void *a
, const void *b
)
66 struct cutpos
*p1
, *p2
;
68 p1
= (struct cutpos
*)a
;
69 p2
= (struct cutpos
*)b
;
70 return (p1
->startpos
- p2
->startpos
);
74 getlist (char *arg
, struct cutpos
**opp
)
76 char *ntok
, *ltok
, *larg
;
79 struct cutpos
*poslist
;
86 while (ltok
= strsep (&larg
, ","))
91 ntok
= strsep (<ok
, "-");
96 if (legal_number (ntok
, &num
) == 0 || (int)num
!= num
|| num
<= 0)
98 builtin_error ("%s: invalid list value", ntok
);
103 s
--; /* fields are 1-based */
111 if (legal_number (ltok
, &num
) == 0 || (int)num
!= num
|| num
<= 0)
113 builtin_error ("%s: invalid list value", ltok
);
126 poslist
= (struct cutpos
*)xrealloc (poslist
, nsize
* sizeof (struct cutpos
));
128 poslist
[npos
].startpos
= s
;
129 poslist
[npos
].endpos
= e
;
134 builtin_error ("missing list of positions");
139 qsort (poslist
, npos
, sizeof(poslist
[0]), poscmp
);
146 cutbytes (SHELL_VAR
*v
, char *line
, struct cutop
*ops
)
153 llen
= strlen (line
);
154 buf
= xmalloc (llen
+ 1);
155 bmap
= xmalloc (llen
+ 1);
156 memset (bmap
, 0, llen
);
158 for (n
= 0; n
< ops
->npos
; n
++)
160 s
= ops
->poslist
[n
].startpos
; /* no translation needed yet */
161 e
= ops
->poslist
[n
].endpos
;
164 else if (e
== EOL
|| e
>= llen
)
166 /* even if a column is specified multiple times, it will only be printed
168 for (i
= s
; i
<= e
; i
++)
173 for (i
= 0; i
< llen
; i
++)
181 bind_array_element (v
, ind
, buf
, 0);
185 printf ("%s\n", buf
);
194 cutchars (SHELL_VAR
*v
, char *line
, struct cutop
*ops
)
203 return (cutbytes (v
, line
, ops
));
204 if (locale_utf8locale
&& utf8_mbsmbchar (line
) == 0)
205 return (cutbytes (v
, line
, ops
));
207 llen
= strlen (line
);
208 wbuf
= (wchar_t *)xmalloc ((llen
+ 1) * sizeof (wchar_t));
210 wlen
= mbstowcs (wbuf
, line
, llen
);
211 if (MB_INVALIDCH (wlen
))
214 return (cutbytes (v
, line
, ops
));
217 bmap
= xmalloc (llen
+ 1);
218 memset (bmap
, 0, llen
);
220 for (n
= 0; n
< ops
->npos
; n
++)
222 s
= ops
->poslist
[n
].startpos
; /* no translation needed yet */
223 e
= ops
->poslist
[n
].endpos
;
226 else if (e
== EOL
|| e
>= wlen
)
228 /* even if a column is specified multiple times, it will only be printed
230 for (i
= s
; i
<= e
; i
++)
234 wb2
= (wchar_t *)xmalloc ((wlen
+ 1) * sizeof (wchar_t));
236 for (i
= 0; i
< wlen
; i
++)
244 n
= wcstombs (buf
, wb2
, llen
);
249 bind_array_element (v
, ind
, buf
, 0);
253 printf ("%s\n", buf
);
261 /* The basic strategy is to cut the line into fields using strsep, populate
262 an array of fields from 0..nf, then select those fields using the same
263 bitmap approach as cut{bytes,chars} and assign them to the array variable
264 V or print them on stdout. This function obeys SFLAG. */
266 cutfields (SHELL_VAR
*v
, char *line
, struct cutop
*ops
)
269 char *buf
, *bmap
, *field
, **fields
, delim
[2];
271 int i
, b
, n
, s
, e
, nf
;
275 delim
[0] = ops
->delim
;
285 field
= strsep (&buf
, delim
); /* destructive */
289 fields
= xrealloc (fields
, fsize
* sizeof (char *));
300 if (ops
->flags
& SFLAG
)
304 bind_array_element (v
, ind
, line
, 0);
308 printf ("%s\n", line
);
312 bmap
= xmalloc (nf
+ 1);
313 memset (bmap
, 0, nf
);
315 for (n
= 0; n
< ops
->npos
; n
++)
317 s
= ops
->poslist
[n
].startpos
; /* no translation needed yet */
318 e
= ops
->poslist
[n
].endpos
;
321 else if (e
== EOL
|| e
>= nf
)
323 /* even if a column is specified multiple times, it will only be printed
325 for (i
= s
; i
<= e
; i
++)
329 for (i
= 1, b
= 0; b
< nf
; b
++)
335 bind_array_element (v
, ind
, fields
[b
], 0);
341 putchar (ops
->delim
);
342 printf ("%s", fields
[b
]);
353 cutline (SHELL_VAR
*v
, char *line
, struct cutop
*ops
)
357 if (ops
->flags
& BFLAG
)
358 rval
= cutbytes (v
, line
, ops
);
359 else if (ops
->flags
& CFLAG
)
360 rval
= cutchars (v
, line
, ops
);
362 rval
= cutfields (v
, line
, ops
);
364 return (rval
>= 0 ? EXECUTION_SUCCESS
: EXECUTION_FAILURE
);
368 cutfile (SHELL_VAR
*v
, WORD_LIST
*list
, struct cutop
*ops
)
370 int fd
, unbuffered_read
;
383 if (l
== 0 || (l
->word
->word
[0] == '-' && l
->word
->word
[1] == '\0'))
386 fd
= open (l
->word
->word
, O_RDONLY
);
389 file_error (l
->word
->word
);
390 return (EXECUTION_FAILURE
);
394 unbuffered_read
= (lseek (fd
, 0L, SEEK_CUR
) < 0) && (errno
== ESPIPE
);
399 while ((n
= zgetline (fd
, &line
, &llen
, '\n', unbuffered_read
)) != -1)
403 line
[n
] = '\0'; /* cutline expects no newline terminator */
404 cutline (v
, line
, ops
); /* can modify line */
416 return EXECUTION_SUCCESS
;
419 #define OPTSET(x) ((cutflags & (x)) ? 1 : 0)
422 cut_internal (int which
, WORD_LIST
*list
)
424 int opt
, rval
, cutflags
, delim
, npos
;
425 char *array_name
, *cutstring
, *list_arg
;
428 struct cutpos
*poslist
;
431 rval
= EXECUTION_SUCCESS
;
438 reset_internal_getopt ();
439 while ((opt
= internal_getopt (list
, "a:b:c:d:f:sn")) != -1)
444 array_name
= list_optarg
;
448 list_arg
= list_optarg
;
452 list_arg
= list_optarg
;
456 delim
= list_optarg
[0];
457 if (delim
== 0 || list_optarg
[1])
459 builtin_error ("delimiter must be a single non-null character");
465 list_arg
= list_optarg
;
480 if (array_name
&& (legal_identifier (array_name
) == 0))
482 sh_invalidid (array_name
);
483 return (EXECUTION_FAILURE
);
486 if (list
== 0 && which
== 0)
488 builtin_error ("string argument required");
492 /* options are mutually exclusive and one is required */
493 if ((OPTSET (BFLAG
) + OPTSET (CFLAG
) + OPTSET (FFLAG
)) != 1)
499 if ((npos
= getlist (list_arg
, &poslist
)) < 0)
502 return (EXECUTION_FAILURE
);
507 v
= builtin_find_indexed_array (array_name
, 1);
511 return (EXECUTION_FAILURE
);
518 op
.poslist
= poslist
;
520 /* we implement cut as a builtin with a cutfile() function that opens each
521 filename in LIST as a filename (or `-' for stdin) and runs cutline on
522 every line in the file. */
525 cutstring
= list
->word
->word
;
526 if (cutstring
== 0 || *cutstring
== 0)
529 return (EXECUTION_SUCCESS
);
531 rval
= cutline (v
, cutstring
, &op
);
534 rval
= cutfile (v
, list
, &op
);
541 lcut_builtin (WORD_LIST
*list
)
543 return (cut_internal (0, list
));
547 cut_builtin (WORD_LIST
*list
)
549 return (cut_internal (1, list
));
553 "Extract selected fields from a string.",
555 "Select portions of LINE (as specified by LIST) and assign them to",
556 "elements of the indexed array ARRAY starting at index 0, or write",
557 "them to the standard output if -a is not specified.",
559 "Items specified by LIST are either column positions or fields delimited",
560 "by a special character, and are described more completely in cut(1).",
562 "Columns correspond to bytes (-b), characters (-c), or fields (-f). The",
563 "field delimiter is specified by -d (default TAB). Column numbering",
568 struct builtin lcut_struct
= {
569 "lcut", /* builtin name */
570 lcut_builtin
, /* function implementing the builtin */
571 BUILTIN_ENABLED
, /* initial flags for builtin */
572 lcut_doc
, /* array of long documentation strings. */
573 "lcut [-a ARRAY] [-b LIST] [-c LIST] [-f LIST] [-d CHAR] [-sn] line", /* usage synopsis; becomes short_doc */
574 0 /* reserved for internal use */
578 "Extract selected fields from each line of a file.",
580 "Select portions of each line (as specified by LIST) from each FILE",
581 "and write them to the standard output. cut reads from the standard",
582 "input if no FILE arguments are specified or if a FILE argument is a",
585 "Items specified by LIST are either column positions or fields delimited",
586 "by a special character, and are described more completely in cut(1).",
588 "Columns correspond to bytes (-b), characters (-c), or fields (-f). The",
589 "field delimiter is specified by -d (default TAB). Column numbering",
594 struct builtin cut_struct
= {
595 "cut", /* builtin name */
596 cut_builtin
, /* function implementing the builtin */
597 BUILTIN_ENABLED
, /* initial flags for builtin */
598 cut_doc
, /* array of long documentation strings. */
599 "cut [-a ARRAY] [-b LIST] [-c LIST] [-f LIST] [-d CHAR] [-sn] [file ...]", /* usage synopsis; becomes short_doc */
600 0 /* reserved for internal use */