text-utils/parse.c

   1 /*
   2  * Copyright (c) 1989 The Regents of the University of California.
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *      This product includes software developed by the University of
  16  *      California, Berkeley and its contributors.
  17  * 4. Neither the name of the University nor the names of its contributors
  18  *    may be used to endorse or promote products derived from this software
  19  *    without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  */
  33
  34  /* 1999-02-22 Arkadiusz Mi¶kiewicz <misiek@pld.ORG.PL>
  35   * - added Native Language Support
  36   */
  37
  38 #include <sys/types.h>
  39 #include <sys/file.h>
  40 #include <stdio.h>
  41 #include <stdlib.h>
  42 #include <ctype.h>
  43 #include <string.h>
  44 #include "hexdump.h"
  45 #include "nls.h"
  46
  47 static void escape(char *p1);
  48 static void badcnt(const char *s);
  49 static void badsfmt(void);
  50 static void badfmt(const char *fmt);
  51 static void badconv(const char *ch);
  52
  53 FU *endfu;                                      /* format at end-of-data */
  54
  55 void addfile(char *name)
  56 {
  57         char *p;
  58         FILE *fp;
  59         int ch;
  60         char buf[2048 + 1];
  61
  62         if ((fp = fopen(name, "r")) == NULL) {
  63                 (void)fprintf(stderr, _("hexdump: can't read %s.\n"), name);
  64                 exit(1);
  65         }
  66         while (fgets(buf, sizeof(buf), fp)) {
  67                 if ((p = index(buf, '\n')) == NULL) {
  68                         (void)fprintf(stderr, _("hexdump: line too long.\n"));
  69                         while ((ch = getchar()) != '\n' && ch != EOF);
  70                         continue;
  71                 }
  72                 *p = '\0';
  73                 for (p = buf; *p && isspace((unsigned char)*p); ++p);
  74                 if (!*p || *p == '#')
  75                         continue;
  76                 add(p);
  77         }
  78         (void)fclose(fp);
  79 }
  80
  81 void add(const char *fmt)
  82 {
  83         const char *p;
  84         static FS **nextfs;
  85         FS *tfs;
  86         FU *tfu, **nextfu;
  87         const char *savep;
  88
  89         /* Start new linked list of format units. */
  90         tfs = emalloc(sizeof(FS));
  91         if (!fshead)
  92                 fshead = tfs;
  93         else
  94                 *nextfs = tfs;
  95         nextfs = &tfs->nextfs;
  96         nextfu = &tfs->nextfu;
  97
  98         /* Take the format string and break it up into format units. */
  99         for (p = fmt;;) {
 100                 /* Skip leading white space. */
 101                 for (; isspace((unsigned char)*p); ++p);
 102                 if (!*p)
 103                         break;
 104
 105                 /* Allocate a new format unit and link it in. */
 106                 tfu = emalloc(sizeof(FU));
 107                 *nextfu = tfu;
 108                 nextfu = &tfu->nextfu;
 109                 tfu->reps = 1;
 110
 111                 /* If leading digit, repetition count. */
 112                 if (isdigit((unsigned char)*p)) {
 113                         for (savep = p; isdigit((unsigned char)*p); ++p);
 114                         if (!isspace((unsigned char)*p) && *p != '/')
 115                                 badfmt(fmt);
 116                         /* may overwrite either white space or slash */
 117                         tfu->reps = atoi(savep);
 118                         tfu->flags = F_SETREP;
 119                         /* skip trailing white space */
 120                         for (++p; isspace((unsigned char)*p); ++p);
 121                 }
 122
 123                 /* Skip slash and trailing white space. */
 124                 if (*p == '/')
 125                         while (isspace((unsigned char)*++p));
 126
 127                 /* byte count */
 128                 if (isdigit((unsigned char)*p)) {
 129                         for (savep = p; isdigit((unsigned char)*p); ++p);
 130                         if (!isspace((unsigned char)*p))
 131                                 badfmt(fmt);
 132                         tfu->bcnt = atoi(savep);
 133                         /* skip trailing white space */
 134                         for (++p; isspace((unsigned char)*p); ++p);
 135                 }
 136
 137                 /* format */
 138                 if (*p != '"')
 139                         badfmt(fmt);
 140                 for (savep = ++p; *p != '"';)
 141                         if (*p++ == 0)
 142                                 badfmt(fmt);
 143                 if (!(tfu->fmt = malloc(p - savep + 1)))
 144                         nomem();
 145                 (void) strncpy(tfu->fmt, savep, p - savep);
 146                 tfu->fmt[p - savep] = '\0';
 147                 escape(tfu->fmt);
 148                 p++;
 149         }
 150 }
 151
 152 static const char *spec = ".#-+ 0123456789";
 153
 154 int size(FS *fs)
 155 {
 156         FU *fu;
 157         int bcnt, cursize;
 158         char *fmt;
 159         int prec;
 160
 161         /* figure out the data block size needed for each format unit */
 162         for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
 163                 if (fu->bcnt) {
 164                         cursize += fu->bcnt * fu->reps;
 165                         continue;
 166                 }
 167                 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
 168                         if (*fmt != '%')
 169                                 continue;
 170                         /*
 171                          * skip any special chars -- save precision in
 172                          * case it's a %s format.
 173                          */
 174                         while (index(spec + 1, *++fmt));
 175                         if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
 176                                 prec = atoi(fmt);
 177                                 while (isdigit((unsigned char)*++fmt));
 178                         }
 179                         switch(*fmt) {
 180                         case 'c':
 181                                 bcnt += 1;
 182                                 break;
 183                         case 'd': case 'i': case 'o': case 'u':
 184                         case 'x': case 'X':
 185                                 bcnt += 4;
 186                                 break;
 187                         case 'e': case 'E': case 'f': case 'g': case 'G':
 188                                 bcnt += 8;
 189                                 break;
 190                         case 's':
 191                                 bcnt += prec;
 192                                 break;
 193                         case '_':
 194                                 switch(*++fmt) {
 195                                 case 'c': case 'p': case 'u':
 196                                         bcnt += 1;
 197                                         break;
 198                                 }
 199                         }
 200                 }
 201                 cursize += bcnt * fu->reps;
 202         }
 203         return(cursize);
 204 }
 205
 206 void rewrite(FS *fs)
 207 {
 208         enum { NOTOKAY, USEBCNT, USEPREC } sokay;
 209         PR *pr, **nextpr;
 210         FU *fu;
 211         char *p1, *p2;
 212         char savech, *fmtp, cs[3];
 213         int nconv, prec;
 214
 215         nextpr = NULL;
 216         prec = 0;
 217
 218         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 219                 /*
 220                  * Break each format unit into print units; each
 221                  * conversion character gets its own.
 222                  */
 223                 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
 224                         pr = emalloc(sizeof(PR));
 225                         if (!fu->nextpr)
 226                                 fu->nextpr = pr;
 227                         else
 228                                 *nextpr = pr;
 229
 230                         /* Skip preceding text and up to the next % sign. */
 231                         for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
 232
 233                         /* Only text in the string. */
 234                         if (!*p1) {
 235                                 pr->fmt = fmtp;
 236                                 pr->flags = F_TEXT;
 237                                 break;
 238                         }
 239
 240                         /*
 241                          * Get precision for %s -- if have a byte count, don't
 242                          * need it.
 243                          */
 244                         if (fu->bcnt) {
 245                                 sokay = USEBCNT;
 246                                 /* skip to conversion character */
 247                                 for (++p1; index(spec, *p1); ++p1);
 248                         } else {
 249                                 /* skip any special chars, field width */
 250                                 while (index(spec + 1, *++p1));
 251                                 if (*p1 == '.' &&
 252                                     isdigit((unsigned char)*++p1)) {
 253                                         sokay = USEPREC;
 254                                         prec = atoi(p1);
 255                                         while (isdigit((unsigned char)*++p1));
 256                                 } else
 257                                         sokay = NOTOKAY;
 258                         }
 259
 260                         p2 = p1 + 1;            /* Set end pointer. */
 261                         cs[0] = *p1;            /* Set conversion string. */
 262                         cs[1] = 0;
 263
 264                         /*
 265                          * Figure out the byte count for each conversion;
 266                          * rewrite the format as necessary, set up blank-
 267                          * padding for end of data.
 268                          */
 269                         switch(cs[0]) {
 270                         case 'c':
 271                                 pr->flags = F_CHAR;
 272                                 switch(fu->bcnt) {
 273                                 case 0: case 1:
 274                                         pr->bcnt = 1;
 275                                         break;
 276                                 default:
 277                                         p1[1] = '\0';
 278                                         badcnt(p1);
 279                                 }
 280                                 break;
 281                         case 'd': case 'i':
 282                                 pr->flags = F_INT;
 283                                 goto isint;
 284                         case 'o': case 'u': case 'x': case 'X':
 285                                 pr->flags = F_UINT;
 286 isint:                          cs[2] = '\0';
 287                                 cs[1] = cs[0];
 288                                 cs[0] = 'q';
 289                                 switch(fu->bcnt) {
 290                                 case 0: case 4:
 291                                         pr->bcnt = 4;
 292                                         break;
 293                                 case 1:
 294                                         pr->bcnt = 1;
 295                                         break;
 296                                 case 2:
 297                                         pr->bcnt = 2;
 298                                         break;
 299                                 case 8:
 300                                         pr->bcnt = 8;
 301                                         break;
 302                                 default:
 303                                         p1[1] = '\0';
 304                                         badcnt(p1);
 305                                 }
 306                                 break;
 307                         case 'e': case 'E': case 'f': case 'g': case 'G':
 308                                 pr->flags = F_DBL;
 309                                 switch(fu->bcnt) {
 310                                 case 0: case 8:
 311                                         pr->bcnt = 8;
 312                                         break;
 313                                 case 4:
 314                                         pr->bcnt = 4;
 315                                         break;
 316                                 default:
 317                                         p1[1] = '\0';
 318                                         badcnt(p1);
 319                                 }
 320                                 break;
 321                         case 's':
 322                                 pr->flags = F_STR;
 323                                 switch(sokay) {
 324                                 case NOTOKAY:
 325                                         badsfmt();
 326                                 case USEBCNT:
 327                                         pr->bcnt = fu->bcnt;
 328                                         break;
 329                                 case USEPREC:
 330                                         pr->bcnt = prec;
 331                                         break;
 332                                 }
 333                                 break;
 334                         case '_':
 335                                 ++p2;
 336                                 switch(p1[1]) {
 337                                 case 'A':
 338                                         endfu = fu;
 339                                         fu->flags |= F_IGNORE;
 340                                         /* FALLTHROUGH */
 341                                 case 'a':
 342                                         pr->flags = F_ADDRESS;
 343                                         ++p2;
 344                                         switch(p1[2]) {
 345                                         case 'd': case 'o': case'x':
 346                                                 cs[0] = 'q';
 347                                                 cs[1] = p1[2];
 348                                                 cs[2] = '\0';
 349                                                 break;
 350                                         default:
 351                                                 p1[3] = '\0';
 352                                                 badconv(p1);
 353                                         }
 354                                         break;
 355                                 case 'c':
 356                                         pr->flags = F_C;
 357                                         /* cs[0] = 'c'; set in conv_c */
 358                                         goto isint2;
 359                                 case 'p':
 360                                         pr->flags = F_P;
 361                                         cs[0] = 'c';
 362                                         goto isint2;
 363                                 case 'u':
 364                                         pr->flags = F_U;
 365                                         /* cs[0] = 'c'; set in conv_u */
 366 isint2:                                 switch(fu->bcnt) {
 367                                         case 0: case 1:
 368                                                 pr->bcnt = 1;
 369                                                 break;
 370                                         default:
 371                                                 p1[2] = '\0';
 372                                                 badcnt(p1);
 373                                         }
 374                                         break;
 375                                 default:
 376                                         p1[2] = '\0';
 377                                         badconv(p1);
 378                                 }
 379                                 break;
 380                         default:
 381                                 p1[1] = '\0';
 382                                 badconv(p1);
 383                         }
 384
 385                         /*
 386                          * Copy to PR format string, set conversion character
 387                          * pointer, update original.
 388                          */
 389                         savech = *p2;
 390                         p1[0] = '\0';
 391                         pr->fmt = emalloc(strlen(fmtp) + strlen(cs) + 1);
 392                         (void)strcpy(pr->fmt, fmtp);
 393                         (void)strcat(pr->fmt, cs);
 394                         *p2 = savech;
 395                         pr->cchar = pr->fmt + (p1 - fmtp);
 396                         fmtp = p2;
 397
 398                         /* Only one conversion character if byte count */
 399                         if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++) {
 400                                 (void)fprintf(stderr,
 401                                     _("hexdump: byte count with multiple conversion characters.\n"));
 402                                 exit(1);
 403                         }
 404                 }
 405                 /*
 406                  * If format unit byte count not specified, figure it out
 407                  * so can adjust rep count later.
 408                  */
 409                 if (!fu->bcnt)
 410                         for (pr = fu->nextpr; pr; pr = pr->nextpr)
 411                                 fu->bcnt += pr->bcnt;
 412         }
 413         /*
 414          * If the format string interprets any data at all, and it's
 415          * not the same as the blocksize, and its last format unit
 416          * interprets any data at all, and has no iteration count,
 417          * repeat it as necessary.
 418          *
 419          * If rep count is greater than 1, no trailing whitespace
 420          * gets output from the last iteration of the format unit.
 421          */
 422         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 423                 if (!fu->nextfu && fs->bcnt < blocksize &&
 424                     !(fu->flags&F_SETREP) && fu->bcnt)
 425                         fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
 426                 if (fu->reps > 1) {
 427                         for (pr = fu->nextpr;; pr = pr->nextpr)
 428                                 if (!pr->nextpr)
 429                                         break;
 430                         for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
 431                                 p2 = isspace((unsigned char)*p1) ? p1 : NULL;
 432                         if (p2)
 433                                 pr->nospace = p2;
 434                 }
 435         }
 436 }
 437
 438
 439 static void escape(char *p1)
 440 {
 441         char *p2;
 442
 443         /* alphabetic escape sequences have to be done in place */
 444         for (p2 = p1;; ++p1, ++p2) {
 445                 if (!*p1) {
 446                         *p2 = *p1;
 447                         break;
 448                 }
 449                 if (*p1 == '\\')
 450                         switch(*++p1) {
 451                         case 'a':
 452                              /* *p2 = '\a'; */
 453                                 *p2 = '\007';
 454                                 break;
 455                         case 'b':
 456                                 *p2 = '\b';
 457                                 break;
 458                         case 'f':
 459                                 *p2 = '\f';
 460                                 break;
 461                         case 'n':
 462                                 *p2 = '\n';
 463                                 break;
 464                         case 'r':
 465                                 *p2 = '\r';
 466                                 break;
 467                         case 't':
 468                                 *p2 = '\t';
 469                                 break;
 470                         case 'v':
 471                                 *p2 = '\v';
 472                                 break;
 473                         default:
 474                                 *p2 = *p1;
 475                                 break;
 476                         }
 477         }
 478 }
 479
 480 static void badcnt(const char *s)
 481 {
 482         (void)fprintf(stderr,
 483             _("hexdump: bad byte count for conversion character %s.\n"), s);
 484         exit(1);
 485 }
 486
 487 static void badsfmt(void)
 488 {
 489         (void)fprintf(stderr,
 490             _("hexdump: %%s requires a precision or a byte count.\n"));
 491         exit(1);
 492 }
 493
 494 static void badfmt(const char *fmt)
 495 {
 496         (void)fprintf(stderr, _("hexdump: bad format {%s}\n"), fmt);
 497         exit(1);
 498 }
 499
 500 static void badconv(const char *ch)
 501 {
 502         (void)fprintf(stderr, _("hexdump: bad conversion character %%%s.\n"), ch);
 503         exit(1);
 504 }