- rl_redisplay: if the line consumes more than a screen's worth of
lines, mark the lines that are off the top of the display as having
a `normal' face
+
+ 4/14
+ ----
+variables.c
+ - LINENO: no longer has the integer attribute, since it does not take
+ arithmetic expressions on assignment (use $((expr)) instead)
+
+lib/readline/mbutil.c
+ - _rl_find_prev_utf8char: new function, uses the structure of UTF-8
+ characters to find the start of the previous one no matter where we
+ start: at the beginning of the next character, at the end of a
+ multibyte character, or in the middle of a multibyte character.
+ EXPERIMENTAL
+ - _rl_find_prev_mbchar_internal: for now, call _rl_find_prev_utf8char
+ if we are in a UTF-8 locale. EXPERIMENTAL
+
+ 4/15
+ ----
+lib/readline/mbutil.c
+ - _rl_utf8_mblen: changes to handle invalid multibyte sequences better,
+ even when N means that the sequence would be too short anyway
+
+examples/loadables/csv.c
+ - csv: new loadable builtin to parse a line of csv data and store it
+ into an array supplied as an argument
+
+ 4/16
+ ----
+examples/loadables/cut.c
+ - cut: new loadable builtin
+ - lcut: new builtin that does what cut does but on a string supplied
+ as an argument (only one string for now)
+
examples/loadables/fdflags.c f
examples/loadables/finfo.c f
examples/loadables/cat.c f
-#examples/loadables/cut.c f
+examples/loadables/csv.c f
+examples/loadables/cut.c f
examples/loadables/logname.c f
examples/loadables/basename.c f
examples/loadables/dirname.c f
ALLPROG = print truefalse sleep finfo logname basename dirname fdflags \
tty pathchk tee head mkdir rmdir mkfifo mktemp printenv id whoami \
uname sync push ln unlink realpath strftime mypid setpgid seq rm \
- accept
+ accept csv cut
OTHERPROG = necho hello cat pushd stat
all: $(SHOBJ_STATUS)
realpath: realpath.o
$(SHOBJ_LD) $(SHOBJ_LDFLAGS) $(SHOBJ_XLDFLAGS) -o $@ realpath.o $(SHOBJ_LIBS)
+csv: csv.o
+ $(SHOBJ_LD) $(SHOBJ_LDFLAGS) $(SHOBJ_XLDFLAGS) -o $@ csv.o $(SHOBJ_LIBS)
+
+cut: cut.o
+ $(SHOBJ_LD) $(SHOBJ_LDFLAGS) $(SHOBJ_XLDFLAGS) -o $@ cut.o $(SHOBJ_LIBS)
+
strftime: strftime.o
$(SHOBJ_LD) $(SHOBJ_LDFLAGS) $(SHOBJ_XLDFLAGS) -o $@ strftime.o $(SHOBJ_LIBS)
necho.o: necho.c
hello.o: hello.c
cat.o: cat.c
+csv.o: csv.c
+cut.o: cut.c
printenv.o: printenv.c
id.o: id.c
whoami.o: whoami.c
--- /dev/null
+/* csv - process a line of csv data and populate an indexed array with the
+ fields */
+
+/*
+ Copyright (C) 2020 Free Software Foundation, Inc.
+
+ Bash is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Bash is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Bash. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* See Makefile for compilation details. */
+
+#include <config.h>
+
+#if defined (HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+#include "bashansi.h"
+#include <stdio.h>
+
+#include "loadables.h"
+
+#define CSV_ARRAY_DEFAULT "CSV"
+
+#define NQUOTE 0
+#define DQUOTE 1
+
+/* Split LINE into comma-separated fields, storing each field into a separate
+ element of array variable CSV, starting at index 0. The format of LINE is
+ as described in RFC 4180. */
+static int
+csvsplit (csv, line)
+ SHELL_VAR *csv;
+ char *line;
+{
+ arrayind_t ind;
+ char *field, *prev, *buf, *xbuf;
+ int delim, qstate;
+ int b, rval;
+
+ xbuf = 0;
+ ind = 0;
+ field = prev = line;
+
+ do
+ {
+ if (*prev == '"')
+ {
+ if (xbuf == 0)
+ xbuf = xmalloc (strlen (prev) + 1);
+ buf = xbuf;
+ b = 0;
+ qstate = DQUOTE;
+ for (field = ++prev; *field; field++)
+ {
+ if (qstate == DQUOTE && *field == '"' && field[1] == '"')
+ buf[b++] = *field++; /* skip double quote */
+ else if (qstate == DQUOTE && *field == '"')
+ qstate = NQUOTE;
+ else if (qstate == NQUOTE && *field == ',')
+ break;
+ else
+ /* This copies any text between a closing double quote and the
+ delimiter. If you want to change that, make sure to do the
+ copy only if qstate == DQUOTE. */
+ buf[b++] = *field;
+ }
+ buf[b] = '\0';
+ }
+ else
+ {
+ buf = prev;
+ field = prev + strcspn (prev, ",");
+ }
+
+ delim = *field;
+ *field = '\0';
+
+ bind_array_element (csv, ind, buf, 0);
+ ind++;
+
+ *field = delim;
+
+ if (delim == ',')
+ prev = field + 1;
+ }
+ while (delim == ',');
+
+ if (xbuf)
+ free (xbuf);
+
+ return (rval = ind); /* number of fields */
+}
+
+int
+csv_builtin (list)
+ WORD_LIST *list;
+{
+ int opt, rval;
+ char *array_name, *csvstring;
+ SHELL_VAR *v;
+
+ array_name = 0;
+ rval = EXECUTION_SUCCESS;
+
+ reset_internal_getopt ();
+ while ((opt = internal_getopt (list, "a:")) != -1)
+ {
+ switch (opt)
+ {
+ case 'a':
+ array_name = list_optarg;
+ break;
+ CASE_HELPOPT;
+ default:
+ builtin_usage ();
+ return (EX_USAGE);
+ }
+ }
+ list = loptend;
+
+ if (array_name == 0)
+ array_name = CSV_ARRAY_DEFAULT;
+
+ if (legal_identifier (array_name) == 0)
+ {
+ sh_invalidid (array_name);
+ return (EXECUTION_FAILURE);
+ }
+
+ if (list == 0)
+ {
+ builtin_error ("csv string argument required");
+ return (EX_USAGE);
+ }
+
+ v = find_or_make_array_variable (array_name, 1);
+ if (v == 0 || readonly_p (v) || noassign_p (v))
+ {
+ if (v && readonly_p (v))
+ err_readonly (array_name);
+ return (EXECUTION_FAILURE);
+ }
+ else if (array_p (v) == 0)
+ {
+ builtin_error ("%s: not an indexed array", array_name);
+ return (EXECUTION_FAILURE);
+ }
+ if (invisible_p (v))
+ VUNSETATTR (v, att_invisible);
+ array_flush (array_cell (v));
+
+ csvstring = list->word->word;
+
+ if (csvstring == 0 || *csvstring == 0)
+ return (EXECUTION_SUCCESS);
+
+ opt = csvsplit (v, csvstring);
+ /* Maybe do something with OPT here, it's the number of fields */
+
+ return (rval);
+}
+
+/* Called when builtin is enabled and loaded from the shared object. If this
+ function returns 0, the load fails. */
+int
+csv_builtin_load (name)
+ char *name;
+{
+ return (1);
+}
+
+/* Called when builtin is disabled. */
+void
+csv_builtin_unload (name)
+ char *name;
+{
+}
+
+char *csv_doc[] = {
+ "Read comma-separated fields from a string.",
+ "",
+ "Parse STRING, a line of comma-separated values, into individual fields,",
+ "and store them into the indexed array ARRAYNAME starting at index 0.",
+ "If ARRAYNAME is not supplied, \"CSV\" is the default array name.",
+ (char *)NULL
+};
+
+struct builtin csv_struct = {
+ "csv", /* builtin name */
+ csv_builtin, /* function implementing the builtin */
+ BUILTIN_ENABLED, /* initial flags for builtin */
+ csv_doc, /* array of long documentation strings. */
+ "csv [-a ARRAY] string", /* usage synopsis; becomes short_doc */
+ 0 /* reserved for internal use */
+};
--- /dev/null
+/* lcut - extract specified fields from a line and assign them to an array or
+ print them to the standard output */
+
+/*
+ Copyright (C) 2020 Free Software Foundation, Inc.
+
+ Bash is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Bash is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Bash. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* See Makefile for compilation details. */
+
+#include <config.h>
+
+#if defined (HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+#include "bashansi.h"
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include "loadables.h"
+#include "shmbutil.h"
+
+#define CUT_ARRAY_DEFAULT "CUTFIELDS"
+
+#define NOPOS -2 /* sentinel for unset startpos/endpos */
+
+#define BOL 0
+#define EOL INT_MAX
+#define NORANGE -1 /* just a position, no range */
+
+#define BFLAG (1 << 0)
+#define CFLAG (1 << 1)
+#define DFLAG (1 << 2)
+#define FFLAG (1 << 3)
+#define SFLAG (1 << 4)
+
+struct cutpos
+{
+ int startpos, endpos; /* zero-based, correction done in getlist() */
+};
+
+struct cutop
+{
+ int flags;
+ int delim;
+ int npos;
+ struct cutpos *poslist;
+};
+
+static int
+poscmp (a, b)
+ void *a, *b;
+{
+ struct cutpos *p1, *p2;
+
+ p1 = (struct cutpos *)a;
+ p2 = (struct cutpos *)b;
+ return (p1->startpos - p2->startpos);
+}
+
+static int
+getlist (arg, opp)
+ char *arg;
+ struct cutpos **opp;
+{
+ char *ntok, *ltok, *larg;
+ int s, e;
+ intmax_t num;
+ struct cutpos *poslist;
+ int npos, nsize;
+
+ poslist = 0;
+ nsize = npos = 0;
+ s = e = 0;
+ larg = arg;
+ while (ltok = strsep (&larg, ","))
+ {
+ if (*ltok == 0)
+ continue;
+
+ ntok = strsep (<ok, "-");
+ if (*ntok == 0)
+ s = BOL;
+ else
+ {
+ if (legal_number (ntok, &num) == 0 || (int)num != num || num <= 0)
+ {
+ builtin_error ("%s: invalid list value", ntok);
+ *opp = poslist;
+ return -1;
+ }
+ s = num;
+ s--; /* fields are 1-based */
+ }
+ if (ltok == 0)
+ e = NORANGE;
+ else if (*ltok == 0)
+ e = EOL;
+ else
+ {
+ if (legal_number (ltok, &num) == 0 || (int)num != num || num <= 0)
+ {
+ builtin_error ("%s: invalid list value", ltok);
+ *opp = poslist;
+ return -1;
+ }
+ e = num;
+ e--;
+ if (e == s)
+ e = NORANGE;
+ }
+
+ if (npos == nsize)
+ {
+ nsize += 4;
+ poslist = (struct cutpos *)xrealloc (poslist, nsize * sizeof (struct cutpos));
+ }
+ poslist[npos].startpos = s;
+ poslist[npos].endpos = e;
+ npos++;
+ }
+ if (npos == 0)
+ {
+ builtin_error ("missing list of positions");
+ *opp = poslist;
+ return -1;
+ }
+
+ qsort (poslist, npos, sizeof(poslist[0]), poscmp);
+ *opp = poslist;
+
+ return npos;
+}
+
+static int
+cutbytes (v, line, ops)
+ SHELL_VAR *v;
+ char *line;
+ struct cutop *ops;
+{
+ arrayind_t ind;
+ char *buf, *bmap;
+ size_t llen;
+ int i, b, n, s, e;
+
+ llen = strlen (line);
+ buf = xmalloc (llen + 1);
+ bmap = xmalloc (llen + 1);
+ memset (bmap, 0, llen);
+
+ for (n = 0; n < ops->npos; n++)
+ {
+ s = ops->poslist[n].startpos; /* no translation needed yet */
+ e = ops->poslist[n].endpos;
+ if (e == NORANGE)
+ e = s;
+ else if (e == EOL || e >= llen)
+ e = llen - 1;
+ /* even if a column is specified multiple times, it will only be printed
+ once */
+ for (i = s; i <= e; i++)
+ bmap[i] = 1;
+ }
+
+ b = 0;
+ for (i = 0; i < llen; i++)
+ if (bmap[i])
+ buf[b++] = line[i];
+ buf[b] = 0;
+
+ if (v)
+ {
+ ind = 0;
+ bind_array_element (v, ind, buf, 0);
+ ind++;
+ }
+ else
+ printf ("%s\n", buf);
+
+ free (buf);
+ free (bmap);
+
+ return ind;
+}
+
+static int
+cutchars (v, line, ops)
+ SHELL_VAR *v;
+ char *line;
+ struct cutop *ops;
+{
+ arrayind_t ind;
+ char *buf, *bmap;
+ wchar_t *wbuf, *wb2;
+ size_t llen, wlen;
+ int i, b, n, s, e;
+
+ if (MB_CUR_MAX == 1)
+ return (cutbytes (v, line, ops));
+ if (locale_utf8locale && utf8_mbsmbchar (line) == 0)
+ return (cutbytes (v, line, ops));
+
+ llen = strlen (line);
+ wbuf = (wchar_t *)xmalloc ((llen + 1) * sizeof (wchar_t));
+
+ wlen = mbstowcs (wbuf, line, llen);
+ if (MB_INVALIDCH (wlen))
+ {
+ free (wbuf);
+ return (cutbytes (v, line, ops));
+ }
+
+ bmap = xmalloc (llen + 1);
+ memset (bmap, 0, llen);
+
+ for (n = 0; n < ops->npos; n++)
+ {
+ s = ops->poslist[n].startpos; /* no translation needed yet */
+ e = ops->poslist[n].endpos;
+ if (e == NORANGE)
+ e = s;
+ else if (e == EOL || e >= wlen)
+ e = wlen - 1;
+ /* even if a column is specified multiple times, it will only be printed
+ once */
+ for (i = s; i <= e; i++)
+ bmap[i] = 1;
+ }
+
+ wb2 = (wchar_t *)xmalloc ((wlen + 1) * sizeof (wchar_t));
+ b = 0;
+ for (i = 0; i < wlen; i++)
+ if (bmap[i])
+ wb2[b++] = wbuf[i];
+ wb2[b] = 0;
+
+ free (wbuf);
+
+ buf = bmap;
+ n = wcstombs (buf, wb2, llen);
+
+ if (v)
+ {
+ ind = 0;
+ bind_array_element (v, ind, buf, 0);
+ ind++;
+ }
+ else
+ printf ("%s\n", buf);
+
+ free (buf);
+ free (wb2);
+
+ return ind;
+}
+
+/* The basic strategy is to cut the line into fields using strsep, populate
+ an array of fields from 0..nf, then select those fields using the same
+ bitmap approach as cut{bytes,chars} and assign them to the array variable
+ V or print them on stdout. This function obeys SFLAG. */
+static int
+cutfields (v, line, ops)
+ SHELL_VAR *v;
+ char *line;
+ struct cutop *ops;
+{
+ arrayind_t ind;
+ char *buf, *bmap, *field, **fields, delim[2];
+ size_t llen, fsize;
+ int i, b, n, s, e, nf;
+
+ ind = 0;
+
+ delim[0] = ops->delim;
+ delim[1] = '\0';
+
+ fields = 0;
+ nf = 0;
+ fsize = 0;
+
+ field = buf = line;
+ do
+ {
+ field = strsep (&buf, delim); /* destructive */
+ if (nf == fsize)
+ {
+ fsize += 8;
+ fields = xrealloc (fields, fsize * sizeof (char *));
+ }
+ fields[nf] = field;
+ if (field)
+ nf++;
+ }
+ while (field);
+
+ if (nf == 1)
+ {
+ free (fields);
+ if (ops->flags & SFLAG)
+ return ind;
+ if (v)
+ {
+ bind_array_element (v, ind, line, 0);
+ ind++;
+ }
+ else
+ printf ("%s\n", line);
+ return ind;
+ }
+
+ bmap = xmalloc (nf + 1);
+ memset (bmap, 0, nf);
+
+ for (n = 0; n < ops->npos; n++)
+ {
+ s = ops->poslist[n].startpos; /* no translation needed yet */
+ e = ops->poslist[n].endpos;
+ if (e == NORANGE)
+ e = s;
+ else if (e == EOL || e >= nf)
+ e = nf - 1;
+ /* even if a column is specified multiple times, it will only be printed
+ once */
+ for (i = s; i <= e; i++)
+ bmap[i] = 1;
+ }
+
+ for (i = 1, b = 0; b < nf; b++)
+ {
+ if (bmap[b] == 0)
+ continue;
+ if (v)
+ {
+ bind_array_element (v, ind, fields[b], 0);
+ ind++;
+ }
+ else
+ {
+ if (i == 0)
+ putchar (ops->delim);
+ printf ("%s", fields[b]);
+ }
+ i = 0;
+ }
+ if (v == 0)
+ putchar ('\n');
+
+ return nf;
+}
+
+static int
+cutline (v, line, ops)
+ SHELL_VAR *v;
+ char *line;
+ struct cutop *ops;
+{
+ int rval;
+
+ if (ops->flags & BFLAG)
+ rval = cutbytes (v, line, ops);
+ else if (ops->flags & CFLAG)
+ rval = cutchars (v, line, ops);
+ else
+ rval = cutfields (v, line, ops);
+
+ return (rval >= 0 ? EXECUTION_SUCCESS : EXECUTION_FAILURE);
+}
+
+static int
+cutfile (v, list, ops)
+ SHELL_VAR *v;
+ WORD_LIST *list;
+ struct cutop *ops;
+{
+ int fd, unbuffered_read;
+ char *line, *b;
+ size_t llen;
+ WORD_LIST *l;
+ ssize_t n;
+
+ line = 0;
+ llen = 0;
+
+ l = list;
+ do
+ {
+ /* for each file */
+ if (l == 0 || (l->word->word[0] == '-' && l->word->word[1] == '\0'))
+ fd = 0;
+ else
+ fd = open (l->word->word, O_RDONLY);
+ if (fd < 0)
+ {
+ file_error (l->word->word);
+ return (EXECUTION_FAILURE);
+ }
+
+#ifndef __CYGWIN__
+ unbuffered_read = (lseek (fd, 0L, SEEK_CUR) < 0) && (errno == ESPIPE);
+#else
+ unbuffered_read = 1;
+#endif
+
+ while ((n = zgetline (fd, &line, &llen, '\n', unbuffered_read)) != -1)
+ cutline (v, line, ops); /* can modify line */
+ if (fd > 0)
+ close (fd);
+
+ if (l)
+ l = l->next;
+ }
+ while (l);
+
+ free (line);
+ return EXECUTION_SUCCESS;
+}
+
+#define OPTSET(x) ((cutflags & (x)) ? 1 : 0)
+
+static int
+cut_internal (which, list)
+ int which; /* not used yet */
+ WORD_LIST *list;
+{
+ int opt, rval, cutflags, delim, npos;
+ char *array_name, *cutstring, *list_arg;
+ SHELL_VAR *v;
+ struct cutop op;
+ struct cutpos *poslist;
+
+ v = 0;
+ rval = EXECUTION_SUCCESS;
+
+ cutflags = 0;
+ array_name = 0;
+ list_arg = 0;
+ delim = '\t';
+
+ reset_internal_getopt ();
+ while ((opt = internal_getopt (list, "a:b:c:d:f:sn")) != -1)
+ {
+ switch (opt)
+ {
+ case 'a':
+ array_name = list_optarg;
+ break;
+ case 'b':
+ cutflags |= BFLAG;
+ list_arg = list_optarg;
+ break;
+ case 'c':
+ cutflags |= CFLAG;
+ list_arg = list_optarg;
+ break;
+ case 'd':
+ cutflags |= DFLAG;
+ delim = list_optarg[0];
+ if (delim == 0 || list_optarg[1])
+ {
+ builtin_error ("delimiter must be a single non-null character");
+ return (EX_USAGE);
+ }
+ break;
+ case 'f':
+ cutflags |= FFLAG;
+ list_arg = list_optarg;
+ break;
+ case 'n':
+ break;
+ case 's':
+ cutflags |= SFLAG;
+ break;
+ CASE_HELPOPT;
+ default:
+ builtin_usage ();
+ return (EX_USAGE);
+ }
+ }
+ list = loptend;
+
+ if (array_name && (legal_identifier (array_name) == 0))
+ {
+ sh_invalidid (array_name);
+ return (EXECUTION_FAILURE);
+ }
+
+ if (list == 0 && which == 0)
+ {
+ builtin_error ("string argument required");
+ return (EX_USAGE);
+ }
+
+ /* options are mutually exclusive and one is required */
+ if ((OPTSET (BFLAG) + OPTSET (CFLAG) + OPTSET (FFLAG)) != 1)
+ {
+ builtin_usage ();
+ return (EX_USAGE);
+ }
+
+ if ((npos = getlist (list_arg, &poslist)) < 0)
+ {
+ free (poslist);
+ return (EXECUTION_FAILURE);
+ }
+
+ if (array_name)
+ {
+ v = find_or_make_array_variable (array_name, 1);
+ if (v == 0 || readonly_p (v) || noassign_p (v))
+ {
+ if (v && readonly_p (v))
+ err_readonly (array_name);
+ return (EXECUTION_FAILURE);
+ }
+ else if (array_p (v) == 0)
+ {
+ builtin_error ("%s: not an indexed array", array_name);
+ return (EXECUTION_FAILURE);
+ }
+ if (invisible_p (v))
+ VUNSETATTR (v, att_invisible);
+ array_flush (array_cell (v));
+ }
+
+ op.flags = cutflags;
+ op.delim = delim;
+ op.npos = npos;
+ op.poslist = poslist;
+
+ /* we implement cut as a builtin with a cutfile() function that opens each
+ filename in LIST as a filename (or `-' for stdin) and runs cutline on
+ every line in the file. */
+ if (which == 0)
+ {
+ cutstring = list->word->word;
+ if (cutstring == 0 || *cutstring == 0)
+ {
+ free (poslist);
+ return (EXECUTION_SUCCESS);
+ }
+ rval = cutline (v, cutstring, &op);
+ }
+ else
+ rval = cutfile (v, list, &op);
+
+ return (rval);
+}
+
+int
+lcut_builtin (list)
+ WORD_LIST *list;
+{
+ return (cut_internal (0, list));
+}
+
+int
+cut_builtin (list)
+ WORD_LIST *list;
+{
+ return (cut_internal (1, list));
+}
+
+/* Called when builtin is enabled and loaded from the shared object. If this
+ function returns 0, the load fails. */
+int
+lcut_builtin_load (name)
+ char *name;
+{
+ return (1);
+}
+
+/* Called when builtin is disabled. */
+void
+lcut_builtin_unload (name)
+ char *name;
+{
+}
+
+char *lcut_doc[] = {
+ "Extract selected fields from a string.",
+ "",
+ "Select portions of LINE (as specified by LIST) and assign them to",
+ "elements of the indexed array ARRAY starting at index 0, or write",
+ "them to the standard output if -a is not specified.",
+ "",
+ "Items specified by LIST are either column positions or fields delimited",
+ "by a special character, and are described more completely in cut(1).",
+ "",
+ "Columns correspond to bytes (-b), characters (-c), or fields (-f). The",
+ "field delimiter is specified by -d (default TAB). Column numbering",
+ "starts at 1.",
+ (char *)NULL
+};
+
+struct builtin lcut_struct = {
+ "lcut", /* builtin name */
+ lcut_builtin, /* function implementing the builtin */
+ BUILTIN_ENABLED, /* initial flags for builtin */
+ lcut_doc, /* array of long documentation strings. */
+ "lcut [-a ARRAY] [-b LIST] [-c LIST] [-f LIST] [-d CHAR] [-sn] line", /* usage synopsis; becomes short_doc */
+ 0 /* reserved for internal use */
+};
+
+char *cut_doc[] = {
+ "Extract selected fields from each line of a file.",
+ "",
+ "Select portions of each line (as specified by LIST) from each FILE",
+ "and write them to the standard output. cut reads from the standard",
+ "input if no FILE arguments are specified or if a FILE argument is a",
+ "single hyphen.",
+ "",
+ "Items specified by LIST are either column positions or fields delimited",
+ "by a special character, and are described more completely in cut(1).",
+ "",
+ "Columns correspond to bytes (-b), characters (-c), or fields (-f). The",
+ "field delimiter is specified by -d (default TAB). Column numbering",
+ "starts at 1.",
+ (char *)NULL
+};
+
+struct builtin cut_struct = {
+ "cut", /* builtin name */
+ cut_builtin, /* function implementing the builtin */
+ BUILTIN_ENABLED, /* initial flags for builtin */
+ cut_doc, /* array of long documentation strings. */
+ "cut [-a ARRAY] [-b LIST] [-c LIST] [-f LIST] [-d CHAR] [-sn] [file ...]", /* usage synopsis; becomes short_doc */
+ 0 /* reserved for internal use */
+};
--- /dev/null
+/* lcut - extract specified fields from a line and assign them to an array or
+ print them to the standard output */
+
+/*
+ Copyright (C) 2020 Free Software Foundation, Inc.
+
+ Bash is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Bash is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Bash. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* See Makefile for compilation details. */
+
+#include <config.h>
+
+#if defined (HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+#include "bashansi.h"
+#include <stdio.h>
+
+#include "loadables.h"
+#include "shmbutil.h"
+
+#define CUT_ARRAY_DEFAULT "CUTFIELDS"
+
+#define NOPOS -2 /* sentinel for unset startpos/endpos */
+
+#define BOL 0
+#define EOL INT_MAX
+#define NORANGE -1 /* just a position, no range */
+
+#define BFLAG (1 << 0)
+#define CFLAG (1 << 1)
+#define DFLAG (1 << 2)
+#define FFLAG (1 << 3)
+#define SFLAG (1 << 4)
+
+struct cutpos
+{
+ int startpos, endpos; /* zero-based, correction done in getlist() */
+};
+
+struct cutop
+{
+ int flags;
+ int delim;
+ int npos;
+ struct cutpos *poslist;
+};
+
+static int
+poscmp (a, b)
+ void *a, *b;
+{
+ struct cutpos *p1, *p2;
+
+ p1 = (struct cutpos *)a;
+ p2 = (struct cutpos *)b;
+ return (p1->startpos - p2->startpos);
+}
+
+static int
+getlist (arg, opp)
+ char *arg;
+ struct cutpos **opp;
+{
+ char *ntok, *ltok, *larg;
+ int s, e;
+ intmax_t num;
+ struct cutpos *poslist;
+ int npos, nsize;
+
+ poslist = 0;
+ nsize = npos = 0;
+ s = e = 0;
+ larg = arg;
+ while (ltok = strsep (&larg, ","))
+ {
+ if (*ltok == 0)
+ continue;
+
+ ntok = strsep (<ok, "-");
+ if (*ntok == 0)
+ s = BOL;
+ else
+ {
+ if (legal_number (ntok, &num) == 0 || (int)num != num || num <= 0)
+ {
+ builtin_error ("%s: invalid list value", ntok);
+ *opp = poslist;
+ return -1;
+ }
+ s = num;
+ s--; /* fields are 1-based */
+ }
+ if (ltok == 0)
+ e = NORANGE;
+ else if (*ltok == 0)
+ e = EOL;
+ else
+ {
+ if (legal_number (ltok, &num) == 0 || (int)num != num || num <= 0)
+ {
+ builtin_error ("%s: invalid list value", ltok);
+ *opp = poslist;
+ return -1;
+ }
+ e = num;
+ e--;
+ if (e == s)
+ e = NORANGE;
+ }
+
+ if (npos == nsize)
+ {
+ nsize += 4;
+ poslist = (struct cutpos *)xrealloc (poslist, nsize * sizeof (struct cutpos));
+ }
+ poslist[npos].startpos = s;
+ poslist[npos].endpos = e;
+ npos++;
+ }
+ if (npos == 0)
+ {
+ builtin_error ("missing list of positions");
+ *opp = poslist;
+ return -1;
+ }
+
+ qsort (poslist, npos, sizeof(poslist[0]), poscmp);
+ *opp = poslist;
+
+ return npos;
+}
+
+static int
+cutbytes (v, line, ops)
+ SHELL_VAR *v;
+ char *line;
+ struct cutop *ops;
+{
+ arrayind_t ind;
+ char *buf, *bmap;
+ size_t llen;
+ int i, b, n, s, e;
+
+ llen = strlen (line);
+ buf = xmalloc (llen + 1);
+ bmap = xmalloc (llen + 1);
+ memset (bmap, 0, llen);
+
+ for (n = 0; n < ops->npos; n++)
+ {
+ s = ops->poslist[n].startpos; /* no translation needed yet */
+ e = ops->poslist[n].endpos;
+ if (e == NORANGE)
+ e = s;
+ else if (e == EOL || e >= llen)
+ e = llen - 1;
+ /* even if a column is specified multiple times, it will only be printed
+ once */
+ for (i = s; i <= e; i++)
+ bmap[i] = 1;
+ }
+
+ b = 0;
+ for (i = 0; i < llen; i++)
+ if (bmap[i])
+ buf[b++] = line[i];
+ buf[b] = 0;
+
+ if (v)
+ {
+ ind = 0;
+ bind_array_element (v, ind, buf, 0);
+ ind++;
+ }
+ else
+ printf ("%s\n", buf);
+
+ free (buf);
+ free (bmap);
+
+ return ind;
+}
+
+static int
+cutchars (v, line, ops)
+ SHELL_VAR *v;
+ char *line;
+ struct cutop *ops;
+{
+ arrayind_t ind;
+ char *buf, *bmap;
+ wchar_t *wbuf, *wb2;
+ size_t llen, wlen;
+ int i, b, n, s, e;
+
+ if (MB_CUR_MAX == 1)
+ return (cutbytes (v, line, ops));
+ if (locale_utf8locale && utf8_mbsmbchar (line) == 0)
+ return (cutbytes (v, line, ops));
+
+ llen = strlen (line);
+ wbuf = (wchar_t *)xmalloc ((llen + 1) * sizeof (wchar_t));
+
+ wlen = mbstowcs (wbuf, line, llen);
+ if (MB_INVALIDCH (wlen))
+ {
+ free (wbuf);
+ return (cutbytes (v, line, ops));
+ }
+
+ bmap = xmalloc (llen + 1);
+ memset (bmap, 0, llen);
+
+ for (n = 0; n < ops->npos; n++)
+ {
+ s = ops->poslist[n].startpos; /* no translation needed yet */
+ e = ops->poslist[n].endpos;
+ if (e == NORANGE)
+ e = s;
+ else if (e == EOL || e >= wlen)
+ e = wlen - 1;
+ /* even if a column is specified multiple times, it will only be printed
+ once */
+ for (i = s; i <= e; i++)
+ bmap[i] = 1;
+ }
+
+ wb2 = (wchar_t *)xmalloc ((wlen + 1) * sizeof (wchar_t));
+ b = 0;
+ for (i = 0; i < wlen; i++)
+ if (bmap[i])
+ wb2[b++] = wbuf[i];
+ wb2[b] = 0;
+
+ free (wbuf);
+
+ buf = bmap;
+ n = wcstombs (buf, wb2, llen);
+
+ if (v)
+ {
+ ind = 0;
+ bind_array_element (v, ind, buf, 0);
+ ind++;
+ }
+ else
+ printf ("%s\n", buf);
+
+ free (buf);
+ free (wb2);
+
+ return ind;
+}
+
+/* The basic strategy is to cut the line into fields using strsep, populate
+ an array of fields from 0..nf, then select those fields using the same
+ bitmap approach as cut{bytes,chars} and assign them to the array variable
+ V or print them on stdout. This function obeys SFLAG. */
+static int
+cutfields (v, line, ops)
+ SHELL_VAR *v;
+ char *line;
+ struct cutop *ops;
+{
+ arrayind_t ind;
+ char *buf, *bmap, *field, **fields, delim[2];
+ size_t llen, fsize;
+ int i, b, n, s, e, nf;
+
+ ind = 0;
+
+ delim[0] = ops->delim;
+ delim[1] = '\0';
+
+ fields = 0;
+ nf = 0;
+ fsize = 0;
+
+ field = buf = line;
+ do
+ {
+ field = strsep (&buf, delim); /* destructive */
+ if (nf == fsize)
+ {
+ fsize += 8;
+ fields = xrealloc (fields, fsize * sizeof (char *));
+ }
+ fields[nf] = field;
+ if (field)
+ nf++;
+ }
+ while (field);
+
+ if (nf == 1)
+ {
+ free (fields);
+ if (ops->flags & SFLAG)
+ return ind;
+ if (v)
+ {
+ bind_array_element (v, ind, line, 0);
+ ind++;
+ }
+ else
+ printf ("%s\n", line);
+ return ind;
+ }
+
+ bmap = xmalloc (nf + 1);
+ memset (bmap, 0, nf);
+
+ for (n = 0; n < ops->npos; n++)
+ {
+ s = ops->poslist[n].startpos; /* no translation needed yet */
+ e = ops->poslist[n].endpos;
+ if (e == NORANGE)
+ e = s;
+ else if (e == EOL || e >= nf)
+ e = nf - 1;
+ /* even if a column is specified multiple times, it will only be printed
+ once */
+ for (i = s; i <= e; i++)
+ bmap[i] = 1;
+ }
+
+ for (i = 1, b = 0; b < nf; b++)
+ {
+ if (bmap[b] == 0)
+ continue;
+ if (v)
+ {
+ bind_array_element (v, ind, fields[b], 0);
+ ind++;
+ }
+ else
+ {
+ if (i == 0)
+ putchar (ops->delim);
+ printf ("%s", fields[b]);
+ }
+ i = 0;
+ }
+ if (v == 0)
+ putchar ('\n');
+
+ return nf;
+}
+
+static int
+cutline (v, line, ops)
+ SHELL_VAR *v;
+ char *line;
+ struct cutop *ops;
+{
+ int rval;
+
+ if (ops->flags & BFLAG)
+ rval = cutbytes (v, line, ops);
+ else if (ops->flags & CFLAG)
+ rval = cutchars (v, line, ops);
+ else
+ rval = cutfields (v, line, ops);
+
+ return (rval >= 0 ? EXECUTION_SUCCESS : EXECUTION_FAILURE);
+}
+
+static int
+cutfile (v, list, ops)
+ SHELL_VAR *v;
+ WORD_LIST *list;
+ struct cutop *ops;
+{
+}
+
+#define OPTSET(x) ((cutflags & (x)) ? 1 : 0)
+
+static int
+cut_internal (which, list)
+ int which; /* not used yet */
+ WORD_LIST *list;
+{
+ int opt, rval, cutflags, delim, npos;
+ char *array_name, *cutstring, *list_arg;
+ SHELL_VAR *v;
+ struct cutop op;
+ struct cutpos *poslist;
+
+ v = 0;
+ rval = EXECUTION_SUCCESS;
+
+ cutflags = 0;
+ array_name = 0;
+ list_arg = 0;
+ delim = '\t';
+
+ reset_internal_getopt ();
+ while ((opt = internal_getopt (list, "a:b:c:d:f:sn")) != -1)
+ {
+ switch (opt)
+ {
+ case 'a':
+ array_name = list_optarg;
+ break;
+ case 'b':
+ cutflags |= BFLAG;
+ list_arg = list_optarg;
+ break;
+ case 'c':
+ cutflags |= CFLAG;
+ list_arg = list_optarg;
+ break;
+ case 'd':
+ cutflags |= DFLAG;
+ delim = list_optarg[0];
+ if (delim == 0 || list_optarg[1])
+ {
+ builtin_error ("delimiter must be a single non-null character");
+ return (EX_USAGE);
+ }
+ break;
+ case 'f':
+ cutflags |= FFLAG;
+ list_arg = list_optarg;
+ break;
+ case 'n':
+ break;
+ case 's':
+ cutflags |= SFLAG;
+ break;
+ CASE_HELPOPT;
+ default:
+ builtin_usage ();
+ return (EX_USAGE);
+ }
+ }
+ list = loptend;
+
+ if (array_name && (legal_identifier (array_name) == 0))
+ {
+ sh_invalidid (array_name);
+ return (EXECUTION_FAILURE);
+ }
+
+ if (list == 0)
+ {
+ builtin_error ("string argument required");
+ return (EX_USAGE);
+ }
+
+ /* options are mutually exclusive and one is required */
+ if ((OPTSET (BFLAG) + OPTSET (CFLAG) + OPTSET (FFLAG)) != 1)
+ {
+ builtin_usage ();
+ return (EX_USAGE);
+ }
+
+ if ((npos = getlist (list_arg, &poslist)) < 0)
+ {
+ free (poslist);
+ return (EXECUTION_FAILURE);
+ }
+
+ if (array_name)
+ {
+ v = find_or_make_array_variable (array_name, 1);
+ if (v == 0 || readonly_p (v) || noassign_p (v))
+ {
+ if (v && readonly_p (v))
+ err_readonly (array_name);
+ return (EXECUTION_FAILURE);
+ }
+ else if (array_p (v) == 0)
+ {
+ builtin_error ("%s: not an indexed array", array_name);
+ return (EXECUTION_FAILURE);
+ }
+ if (invisible_p (v))
+ VUNSETATTR (v, att_invisible);
+ array_flush (array_cell (v));
+ }
+
+ cutstring = list->word->word;
+
+ if (cutstring == 0 || *cutstring == 0)
+ {
+ free (poslist);
+ return (EXECUTION_SUCCESS);
+ }
+
+ op.flags = cutflags;
+ op.delim = delim;
+ op.npos = npos;
+ op.poslist = poslist;
+
+ /* we can eventually implement cut as a builtin with a cutfile() function
+ that opens cutstring as a filename (or `-' for stdin) and runs cutline
+ on every line in the file. */
+ if (which == 0)
+ rval = cutline (v, cutstring, &op);
+
+ return (rval);
+}
+
+int
+lcut_builtin (list)
+ WORD_LIST *list;
+{
+ return (cut_internal (0, list));
+}
+
+/* Called when builtin is enabled and loaded from the shared object. If this
+ function returns 0, the load fails. */
+int
+lcut_builtin_load (name)
+ char *name;
+{
+ return (1);
+}
+
+/* Called when builtin is disabled. */
+void
+lcut_builtin_unload (name)
+ char *name;
+{
+}
+
+char *lcut_doc[] = {
+ "Extract selected fields from a string.",
+ "",
+ "Select portions of LINE (as specified by LIST) and assign them to",
+ "elements of the indexed array ARRAY starting at index 0, or write",
+ "them to the standard output if -a is not specified.",
+ "",
+ "Items specified by LIST are either column positions or fields delimited",
+ "by a special character, and are described more completely in cut(1).",
+ "",
+ "Columns correspond to bytes (-b), characters (-c), or fields (-f). The",
+ "field delimiter is specified by -d (default TAB). Column numbering",
+ "starts at 1.",
+ (char *)NULL
+};
+
+struct builtin lcut_struct = {
+ "lcut", /* builtin name */
+ lcut_builtin, /* function implementing the builtin */
+ BUILTIN_ENABLED, /* initial flags for builtin */
+ lcut_doc, /* array of long documentation strings. */
+ "lcut [-a ARRAY] [-b LIST] [-c LIST] [-f LIST] [-d CHAR] [-sn] line", /* usage synopsis; becomes short_doc */
+ 0 /* reserved for internal use */
+};
anything else with it. ASYNC_P says what to do with the tty. If
non-zero, then don't give it away. */
pid_t
-make_child (command, async_p)
+make_child (command, flags)
char *command;
- int async_p;
{
- int forksleep;
+ int async_p, forksleep;
sigset_t set, oset, termset, chldset, oset_copy;
pid_t pid;
SigHandler *oterm;
making_children ();
+ async_p = (flags & FORK_ASYNC);
forksleep = 1;
#if defined (BUFFERED_INPUT)
#define ANY_PID (pid_t)-1
+/* flags for make_child () */
+#define FORK_SYNC 0
+#define FORK_ASYNC 1
+#define FORK_NOJOB 2
+
/* System calls. */
#if !defined (HAVE_UNISTD_H)
extern pid_t fork (), getpid (), getpgrp ();
/* mbutil.c -- readline multibyte character utility functions */
-/* Copyright (C) 2001-2017 Free Software Foundation, Inc.
+/* Copyright (C) 2001-2020 Free Software Foundation, Inc.
This file is part of the GNU Readline Library (Readline), a library
for reading lines of text with interactive input and history editing.
static int
_rl_utf8_mblen (const char *s, size_t n)
{
- unsigned char c, c1;
+ unsigned char c, c1, c2, c3;
if (s == 0)
return (0); /* no shift states */
c1 = (unsigned char)s[1];
if (c < 0xe0)
{
- if (n >= 2 && (s[1] ^ 0x80) < 0x40)
+ if (n == 1)
+ return -2;
+ if (n >= 2 && (c1 ^ 0x80) < 0x40)
return 2;
}
else if (c < 0xf0)
{
- if (n >= 3
- && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ if (n == 1)
+ return -2;
+ if ((c1 ^ 0x80) < 0x40
&& (c >= 0xe1 || c1 >= 0xa0)
&& (c != 0xed || c1 < 0xa0))
- return 3;
+ {
+ if (n == 2)
+ return -2;
+ c2 = (unsigned char)s[2];
+ if ((c2 ^ 0x80) < 0x40)
+ return 3;
+ }
}
- else if (c < 0xf8)
+ else if (c < 0xf4)
{
- if (n >= 4
- && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
- && (s[3] ^ 0x80) < 0x40
+ if (n == 1)
+ return -2;
+ if (((c1 ^ 0x80) < 0x40)
&& (c >= 0xf1 || c1 >= 0x90)
&& (c < 0xf4 || (c == 0xf4 && c1 < 0x90)))
- return 4;
+ {
+ if (n == 2)
+ return -2;
+ c2 = (unsigned char)s[2];
+ if ((c2 ^ 0x80) < 0x40)
+ {
+ if (n == 3)
+ return -2;
+ c3 = (unsigned char)s[3];
+ if ((c3 ^ 0x80) < 0x40)
+ return 4;
+ }
+ }
}
}
/* invalid or incomplete multibyte character */
return point;
}
+static inline int
+_rl_test_nonzero (char *string, int ind, int len)
+{
+ size_t tmp;
+ wchar_t wc;
+ mbstate_t ps;
+
+ memset (&ps, 0, sizeof (mbstate_t));
+ tmp = mbrtowc (&wc, string + ind, len - ind, &ps);
+ /* treat invalid multibyte sequences as non-zero-width */
+ return (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp) || WCWIDTH (wc) > 0);
+}
+
+/* experimental -- needs to handle zero-width characters better */
+static int
+_rl_find_prev_utf8char (char *string, int seed, int find_non_zero)
+{
+ char *s;
+ unsigned char b;
+ int save, prev;
+ size_t len;
+
+ if (find_non_zero)
+ len = RL_STRLEN (string);
+
+ prev = seed - 1;
+ while (prev >= 0)
+ {
+ b = (unsigned char)string[prev];
+ if (UTF8_SINGLEBYTE (b))
+ return (prev);
+
+ save = prev;
+
+ /* Move back until we're not in the middle of a multibyte char */
+ if (UTF8_MBCHAR (b))
+ {
+ while (prev > 0 && (b = (unsigned char)string[--prev]) && UTF8_MBCHAR (b))
+ ;
+ }
+
+ if (UTF8_MBFIRSTCHAR (b))
+ {
+ if (find_non_zero)
+ {
+ if (_rl_test_nonzero (string, prev, len))
+ return (prev);
+ else /* valid but WCWIDTH (wc) == 0 */
+ prev = prev - 1;
+ }
+ else
+ return (prev);
+ }
+ else
+ return (save); /* invalid utf-8 multibyte sequence */
+ }
+
+ return ((prev < 0) ? 0 : prev);
+}
+
/*static*/ int
_rl_find_prev_mbchar_internal (char *string, int seed, int find_non_zero)
{
size_t tmp;
wchar_t wc;
+ if (_rl_utf8locale)
+ return (_rl_find_prev_utf8char (string, seed, find_non_zero));
+
memset(&ps, 0, sizeof(mbstate_t));
length = strlen(string);
anything else with it. ASYNC_P says what to do with the tty. If
non-zero, then don't give it away. */
pid_t
-make_child (command, async_p)
+make_child (command, flags)
char *command;
- int async_p;
+ int flags;
{
pid_t pid;
- int forksleep;
+ int async_p, forksleep;
sigset_t set, oset;
/* Discard saved memory. */
if (command)
free (command);
+ async_p = (flags & FORK_ASYNC);
start_pipeline ();
#if defined (BUFFERED_INPUT)
INIT_DYNAMIC_VAR ("SRANDOM", (char *)NULL, get_urandom, (sh_var_assign_func_t *)NULL);
VSETATTR (v, att_integer);
INIT_DYNAMIC_VAR ("LINENO", (char *)NULL, get_lineno, assign_lineno);
- VSETATTR (v, att_integer|att_regenerate);
+ VSETATTR (v, att_regenerate);
INIT_DYNAMIC_VAR ("BASHPID", (char *)NULL, get_bashpid, null_assign);
VSETATTR (v, att_integer);