a68: parser: scanner

author Jose E. Marchesi <jose.marchesi@oracle.com>

Sat, 11 Oct 2025 17:46:56 +0000 (19:46 +0200)

committer Jose E. Marchesi <jose.marchesi@oracle.com>

Sun, 30 Nov 2025 00:52:08 +0000 (01:52 +0100)
author Jose E. Marchesi <jose.marchesi@oracle.com>
Sat, 11 Oct 2025 17:46:56 +0000 (19:46 +0200)
committer Jose E. Marchesi <jose.marchesi@oracle.com>
Sun, 30 Nov 2025 00:52:08 +0000 (01:52 +0100)
diff --git a/gcc/algol68/a68-parser-scanner.cc b/gcc/algol68/a68-parser-scanner.cc

new file mode 100644 (file)

index 0000000..966ebdf
--- /dev/null
+++ b/gcc/algol68/a68-parser-scanner.cc
@@ -0,0 +1,2346 @@
+/* Context-dependent ALGOL 68 tokeniser.
+   Copyright (C) 2001-2023 J. Marcel van der Veer.
+   Copyright (C) 2025 Jose E. Marchesi.
+
+   Original implementation by J. Marcel van der Veer.
+   Adapted for GCC by Jose E. Marchesi.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Context-dependent ALGOL 68 tokeniser.  */
+
+
+#define INCLUDE_MEMORY
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic.h"
+#include "options.h"
+#include "vec.h"
+
+#include "a68.h"
+
+/* A few forward references of static functions defined in this file.  */
+
+static void include_files (LINE_T *top);
+
+/* Standard prelude and postlude for source files.
+
+   These are used for particular programs only.  Not for prelude packets.
+   We need several versions for the several supported stropping regimes.  */
+
+static const char *
+upper_prelude_start[] = {
+  "BEGIN",
+  "      BEGIN",
+  NO_TEXT
+};
+
+static const char *
+upper_postlude[] = {
+  "      END;",
+  "      stop: SKIP",
+  "END",
+  NO_TEXT
+};
+
+static const char *
+supper_prelude_start[] = {
+  "begin",
+  "     begin",
+  NO_TEXT
+};
+
+static const char *
+supper_postlude[] = {
+  "      end;",
+  "      stop: skip",
+  "end",
+  NO_TEXT
+};
+
+/* Macros.  */
+
+#define NULL_CHAR '\0'
+#define STOP_CHAR 127
+#define FORMFEED_CHAR '\f'
+#define CR_CHAR '\r'
+#define QUOTE_CHAR '"'
+#define APOSTROPHE_CHAR '\''
+#define BACKSLASH_CHAR '\\'
+#define NEWLINE_CHAR '\n'
+#define EXPONENT_CHAR 'e'
+#define RADIX_CHAR 'r'
+#define POINT_CHAR '.'
+#define TAB_CHAR '\t'
+
+#define MAX_RESTART 256
+
+#define EOL(c) ((c) == NEWLINE_CHAR || (c) == NULL_CHAR)
+#define SCAN_ERROR(c, u, v, txt) if (c)                \
+    do                                         \
+      {                                                \
+       a68_scan_error (u, v, txt);             \
+      }                                                \
+    while (0)
+
+
+#define SCAN_DIGITS(c)                         \
+  while (ISDIGIT (c))                          \
+    {                                          \
+      (sym++)[0] = (c);                                \
+      (c) = next_char (ref_l, ref_s, true);    \
+    }
+
+#define SCAN_EXPONENT_PART(c)                                          \
+  do                                                                   \
+    {                                                                  \
+      (sym++)[0] = EXPONENT_CHAR;                                      \
+      (c) = next_char (ref_l, ref_s, true);                            \
+      if ((c) == '+' || (c) == '-') {                                  \
+       (sym++)[0] = (c);                                               \
+       (c) = next_char (ref_l, ref_s, true);                           \
+      }                                                                        \
+      SCAN_ERROR (!ISDIGIT (c), *start_l, *start_c,                    \
+                 "invalid exponent digit");                            \
+      SCAN_DIGITS (c);                                                 \
+    }                                                                  \
+  while (0)
+
+/* Read bytes from file into buffer.  */
+
+static ssize_t
+io_read (FILE *file, void *buf, size_t n)
+{
+  int fd = fileno (file);
+  size_t to_do = n;
+  int restarts = 0;
+  char *z = (char *) buf;
+  while (to_do > 0)
+    {
+      ssize_t bytes_read;
+
+      errno = 0;
+      bytes_read = read (fd, z, to_do);
+      if (bytes_read < 0)
+       {
+         if (errno == EINTR)
+           {
+             /* interrupt, retry.  */
+             bytes_read = 0;
+             if (restarts++ > MAX_RESTART)
+               {
+                 return -1;
+               }
+           }
+         else
+           {
+             /* read error.  */
+             return -1;
+           }
+       }
+      else if (bytes_read == 0)
+       {
+         /* EOF_CHAR */
+         break;
+       }
+      to_do -= (size_t) bytes_read;
+      z += bytes_read;
+    }
+
+  /* return >= 0  */
+  return (ssize_t) n - (ssize_t) to_do;
+}
+
+/* Save scanner state, for character look-ahead.  */
+
+static void
+save_state (LINE_T *ref_l, char *ref_s, char ch)
+{
+  SCAN_STATE_L (&A68_JOB) = ref_l;
+  SCAN_STATE_S (&A68_JOB) = ref_s;
+  SCAN_STATE_C (&A68_JOB) = ch;
+}
+
+/* Restore scanner state, for character look-ahead.  */
+
+static void
+restore_state (LINE_T **ref_l, char **ref_s, char *ch)
+{
+  *ref_l = SCAN_STATE_L (&A68_JOB);
+  *ref_s = SCAN_STATE_S (&A68_JOB);
+  *ch = SCAN_STATE_C (&A68_JOB);
+}
+
+/* New_source_line.  */
+
+static LINE_T *
+new_source_line (void)
+{
+  LINE_T *z = ggc_cleared_alloc<LINE_T> ();
+
+  MARKER (z)[0] = '\0';
+  STRING (z) = NO_TEXT;
+  FILENAME (z) = NO_TEXT;
+  NUMBER (z) = 0;
+  NEXT (z) = NO_LINE;
+  PREVIOUS (z) = NO_LINE;
+  return z;
+}
+
+/* Append a source line to the internal source file.  */
+
+static void
+append_source_line (const char *str, LINE_T **ref_l, int *line_num,
+                   const char *filename)
+{
+  LINE_T *z = new_source_line ();
+
+  /* Link line into the chain.  */
+  STRING (z) = xstrdup (str);
+  FILENAME (z) = ggc_strdup (filename);
+  NUMBER (z) = (*line_num)++;
+  NEXT (z) = NO_LINE;
+  PREVIOUS (z) = *ref_l;
+  if (TOP_LINE (&A68_JOB) == NO_LINE)
+    TOP_LINE (&A68_JOB) = z;
+  if (*ref_l != NO_LINE)
+    NEXT (*ref_l) = z;
+  *ref_l = z;
+}
+
+/* Append environment source lines.  */
+
+static void
+append_environ (const char *str[], LINE_T **ref_l, int *line_num, const char *name)
+{
+  for (int k = 0; str[k] != NO_TEXT; k++)
+    {
+      int zero_line_num = 0;
+      (*line_num)++;
+      append_source_line (str[k], ref_l, &zero_line_num, name);
+    }
+}
+
+/*
+ * Scanner, tokenises the source code.
+ */
+
+/* Emit a diagnostic if CH is an unworthy character.  */
+
+static void
+unworthy (LINE_T *u, char *v, char ch)
+{
+  if (ISPRINT (ch))
+    {
+      if (snprintf (A68 (edit_line), SNPRINTF_SIZE, "*%s",
+                   "unworthy character") < 0)
+       gcc_unreachable ();
+    }
+  else
+    {
+      if (snprintf (A68 (edit_line), SNPRINTF_SIZE, "*%s %c",
+                   "unworthy character", ch) < 0)
+       gcc_unreachable ();
+    }
+
+  a68_scan_error (u, v, A68 (edit_line));
+}
+
+/* Concatenate lines that terminate in '\' with next line.  */
+
+static void
+concatenate_lines (LINE_T * top)
+{
+  LINE_T *q;
+  /* Work from bottom backwards.  */
+  for (q = top; q != NO_LINE && NEXT (q) != NO_LINE; FORWARD (q))
+    ;
+
+  for (; q != NO_LINE; BACKWARD (q))
+    {
+      char *z = STRING (q);
+      size_t len = strlen (z);
+
+      if (len >= 2
+         && z[len - 2] == BACKSLASH_CHAR
+         && z[len - 1] == NEWLINE_CHAR
+         && NEXT (q) != NO_LINE
+         && STRING (NEXT (q)) != NO_TEXT)
+       {
+         z[len - 2] = '\0';
+         len += (int) strlen (STRING (NEXT (q)));
+         z = (char *) xmalloc (len + 1);
+         a68_bufcpy (z, STRING (q), len + 1);
+         a68_bufcat (z, STRING (NEXT (q)), len + 1);
+         STRING (NEXT (q))[0] = '\0';
+         STRING (q) = z;
+       }
+    }
+}
+
+/* Size of source file.  */
+
+static int
+get_source_size (void)
+{
+  FILE *f = FILE_SOURCE_FD (&A68_JOB);
+  return (int) lseek (fileno (f), 0, SEEK_END);
+}
+
+/* Read source file FILENAME and make internal copy.  */
+
+static bool
+read_source_file (const char *filename)
+{
+  struct stat statbuf;
+  LINE_T *ref_l = NO_LINE;
+  int line_num = 0;
+  size_t k;
+  size_t bytes_read;
+  ssize_t l;
+  size_t source_file_size;
+  char *buffer;
+  FILE *f;
+  bool ret = true;
+
+  /* First open the given file.  */
+  if (!(FILE_SOURCE_FD (&A68_JOB) = fopen (filename, "r")))
+    fatal_error (UNKNOWN_LOCATION, "could not open source file %s",
+                filename);
+  FILE_SOURCE_NAME (&A68_JOB) = ggc_strdup (filename);
+  f = FILE_SOURCE_FD (&A68_JOB);
+
+  if (fstat (fileno (f), &statbuf)
+      || !(S_ISREG (statbuf.st_mode) || S_ISCHR (statbuf.st_mode)))
+    fatal_error (UNKNOWN_LOCATION, "specified file %s is a directory",
+                filename);
+
+  if ((source_file_size = get_source_size ()) == 0)
+    {
+      /* The source file is empty.  */
+      ret = false;
+      goto done;
+    }
+
+  /* Allocate A68_PARSER (scan_buf), which is an auxiliary buffer used by the
+     scanner known to be big enough to hold any string contained in the source
+     file.  */
+  A68_PARSER (max_scan_buf_length) = source_file_size + 1;
+  A68_PARSER (max_scan_buf_length) += 1024; /* For the environment.  */
+  A68_PARSER (scan_buf) = (char *) xmalloc (A68_PARSER (max_scan_buf_length));
+
+  /* Prelude.  */
+  append_environ (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING
+                 ? upper_prelude_start : supper_prelude_start,
+                 &ref_l, &line_num, "prelude");
+
+  /* Read the file into a single buffer, so we save on system calls.  */
+  line_num = 1;
+  errno = 0;
+  buffer = (char *) xmalloc (8 + source_file_size);
+  if (lseek (fileno (f), 0, SEEK_SET) < 0)
+    gcc_unreachable ();
+  errno = 0;
+  bytes_read = io_read (f, buffer, source_file_size);
+  gcc_assert (errno == 0 && bytes_read == source_file_size);
+
+  /* Link all lines into the list.  */
+  k = 0;
+  while (k < source_file_size)
+    {
+      l = 0;
+      A68_PARSER (scan_buf)[0] = '\0';
+      while (k < source_file_size  && buffer[k] != NEWLINE_CHAR)
+       {
+         if (k < source_file_size - 1
+             && buffer[k] == CR_CHAR && buffer[k + 1] == NEWLINE_CHAR)
+           k++;
+         else
+           {
+             A68_PARSER (scan_buf)[l++] = buffer[k++];
+             A68_PARSER (scan_buf)[l] = '\0';
+           }
+       }
+      A68_PARSER (scan_buf)[l++] = NEWLINE_CHAR;
+      A68_PARSER (scan_buf)[l] = '\0';
+      if (k < source_file_size)
+       k++;
+      append_source_line (A68_PARSER (scan_buf), &ref_l, &line_num,
+                         FILE_SOURCE_NAME (&A68_JOB));
+      SCAN_ERROR (l != (ssize_t) strlen (A68_PARSER (scan_buf)),
+                 NO_LINE, NO_TEXT, "invalid characters in source file");
+    }
+
+  /* Postlude.  */
+  append_environ (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING
+                 ? upper_postlude : supper_postlude,
+                 &ref_l, &line_num, "postlude");
+
+  /* Concatenate lines that end with \.  */
+  concatenate_lines (TOP_LINE (&A68_JOB));
+
+  /* Include files.  */
+  include_files (TOP_LINE (&A68_JOB));
+
+ done:
+  if (fclose (FILE_SOURCE_FD (&A68_JOB)) != 0)
+    gcc_unreachable ();
+  return ret;
+}
+
+/* Get next character from internal copy of source file.
+
+   If ALLOW_TYPO is true then typographical display features are skipped.
+
+   If ALLOW_ONE_UNDER is true then a single underscore character is
+   skipped.  */
+
+static char
+next_char (LINE_T **ref_l, char **ref_s, bool allow_typo,
+          bool allow_one_under = false, bool *found_under = NULL)
+{
+  char ch;
+
+  /* Empty source.  */
+  if (*ref_l == NO_LINE)
+    return STOP_CHAR;
+
+  if ((*ref_s)[0] == NEWLINE_CHAR || (*ref_s)[0] == '\0')
+    {
+      /* Go to new line.  */
+      *ref_l = NEXT (*ref_l);
+      if (*ref_l == NO_LINE)
+        return STOP_CHAR;
+      *ref_s = STRING (*ref_l);
+    }
+  else
+    (*ref_s)++;
+
+  /* Deliver next char.  */
+  ch = (*ref_s)[0];
+  if ((allow_typo && (ISSPACE (ch) || ch == FORMFEED_CHAR))
+      || (allow_one_under && ch == '_'))
+    {
+      if (ch == '_' && found_under != NULL)
+       *found_under = true;
+      ch = next_char (ref_l, ref_s, allow_typo);
+    }
+  return ch;
+}
+
+/* Find first character that can start a valid symbol.  */
+
+static void
+get_good_char (char *ref_c, LINE_T **ref_l, char **ref_s)
+{
+  while (*ref_c != STOP_CHAR && (ISSPACE (*ref_c) || (*ref_c == '\0')))
+    *ref_c = next_char (ref_l, ref_s, false);
+}
+
+/* Case insensitive strncmp for at most the number of chars in V.  */
+
+static int
+streq (const char *u, const char *v)
+{
+  int diff;
+  for (diff = 0; diff == 0 && u[0] != NULL_CHAR && v[0] != NULL_CHAR; u++, v++)
+    diff = ((int) TOLOWER (u[0])) - ((int) TOLOWER (v[0]));
+  return diff;
+}
+
+/* Case insensitive strncmp for at most N chars.  */
+
+static int
+strneq (const char *u, const char *v, size_t n)
+{
+  int diff;
+  size_t pos = 0;
+  for (diff = 0;
+       diff == 0 && u[0] != NULL_CHAR && v[0] != NULL_CHAR && pos < n;
+       u++, v++, pos++)
+    diff = ((int) TOLOWER (u[0])) - ((int) TOLOWER (v[0]));
+  return diff;
+}
+
+
+/* Determine whether u is bold tag v, independent of stropping regime.  */
+
+static bool
+is_bold (char *u, const char *v)
+{
+  size_t len = strlen (v);
+
+  if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING)
+    /* UPPER stropping.  */
+    return strncmp (u, v, len) == 0 && !ISUPPER (u[len]);
+  else
+    /* SUPPER stropping.  */
+    return (strlen (u) >= len
+           && ISLOWER (u[0])
+           && strneq (u, v, len) == 0
+           && !ISALPHA (u[len])
+           && !ISDIGIT (u[len]));
+}
+
+/* Skip a string denotation.
+
+   This function returns true if the end of the string denotation is found.
+   Returns false otherwise.  */
+
+static bool
+skip_string (LINE_T **top, char **ch)
+{
+  LINE_T *u = *top;
+  char *v = *ch;
+  v++;
+  while (u != NO_LINE)
+    {
+      while (v[0] != NULL_CHAR)
+       {
+         if (v[0] == QUOTE_CHAR && v[1] != QUOTE_CHAR)
+           {
+             *top = u;
+             *ch = &v[1];
+             return true;
+           }
+         else if (v[0] == QUOTE_CHAR && v[1] == QUOTE_CHAR)
+           {
+             v += 2;
+           }
+         else
+           {
+             v++;
+           }
+       }
+      FORWARD (u);
+      if (u != NO_LINE) {
+       v = &(STRING (u)[0]);
+      } else {
+       v = NO_TEXT;
+      }
+    }
+  return false;
+}
+
+/* Skip a comment.
+
+   This function returns true if the end of the comment is found.  Returns
+   false otherwise.  */
+
+static bool
+skip_comment (LINE_T **top, char **ch, int delim)
+{
+  LINE_T *u = *top;
+  char *v = *ch;
+  int nesting_level = 1;
+  v++;
+  while (u != NO_LINE)
+    {
+      while (v[0] != NULL_CHAR)
+       {
+         LINE_T *l = u;
+         char *c = v;
+
+         if (v[0] == QUOTE_CHAR && skip_string (&l, &c)
+             && (delim == BOLD_COMMENT_BEGIN_SYMBOL || delim == BRIEF_COMMENT_BEGIN_SYMBOL))
+           {
+             u = l;
+             v = c;
+           }
+         else if (is_bold (v, "COMMENT") && delim == BOLD_COMMENT_SYMBOL)
+           {
+             *top = u;
+             *ch = &v[1];
+             return true;
+           }
+         else if (is_bold (v, "CO") && delim == STYLE_I_COMMENT_SYMBOL)
+           {
+             *top = u;
+             *ch = &v[1];
+             return true;
+           }
+         else if (v[0] == '#' && delim == STYLE_II_COMMENT_SYMBOL)
+           {
+             *top = u;
+             *ch = &v[1];
+             return true;
+           }
+         else if (is_bold (v, "ETON") && delim == BOLD_COMMENT_BEGIN_SYMBOL)
+           {
+             gcc_assert (nesting_level > 0);
+             nesting_level -= 1;
+             if (nesting_level == 0)
+               {
+                 *top = u;
+                 *ch = &v[1];
+                 return true;
+               }
+           }
+         else if (v[0] == '}' && delim == BRIEF_COMMENT_BEGIN_SYMBOL)
+           {
+             gcc_assert (nesting_level > 0);
+             nesting_level -= 1;
+             if (nesting_level == 0)
+               {
+                 *top = u;
+                 *ch = &v[1];
+                 return true;
+               }
+           }
+         else
+           {
+             if ((is_bold (v, "NOTE") && delim == BOLD_COMMENT_BEGIN_SYMBOL)
+                 || (v[0] == '{' && delim == BRIEF_COMMENT_BEGIN_SYMBOL))
+               {
+                 nesting_level += 1;
+               }
+
+             v++;
+           }
+       }
+      FORWARD (u);
+      if (u != NO_LINE)
+       v = &(STRING (u)[0]);
+      else
+       v = NO_TEXT;
+    }
+
+  return false;
+}
+
+/* Skip rest of pragmat.
+
+   This function returns true if the end of the pragmat is found, false
+   otherwise.  */
+
+static bool
+skip_pragmat (LINE_T **top, char **ch, int delim, bool whitespace)
+{
+  LINE_T *u = *top;
+  char *v = *ch;
+  while (u != NO_LINE)
+    {
+      while (v[0] != NULL_CHAR)
+       {
+         if (is_bold (v, "PRAGMAT") && delim == BOLD_PRAGMAT_SYMBOL)
+           {
+             *top = u;
+             *ch = &v[1];
+             return true;
+           }
+         else if (is_bold (v, "PR") && delim == STYLE_I_PRAGMAT_SYMBOL)
+           {
+             *top = u;
+             *ch = &v[1];
+             return true;
+           }
+         else
+           {
+             if (whitespace && !ISSPACE (v[0]) && v[0] != NEWLINE_CHAR)
+               {
+                 SCAN_ERROR (true, u, v, "error in pragment");
+               }
+             else if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING && ISUPPER (v[0]))
+               {
+                 /* Skip a bold word as you may trigger on REPR, for
+                    instance.  */
+                 while (ISUPPER (v[0]))
+                   v++;
+               }
+             else if (OPTION_STROPPING (&A68_JOB) == SUPPER_STROPPING && ISLOWER (v[0]))
+               {
+                 /* Skip a tag as you may trigger on expr, for instance.  */
+                 while (ISLOWER (v[0]) || ISDIGIT (v[0]) || v[0] == '_')
+                   v++;
+               }
+             else
+               {
+                 v++;
+               }
+           }
+       }
+
+      FORWARD (u);
+      if (u != NO_LINE)
+       v = &(STRING (u)[0]);
+      else
+       v = NO_TEXT;
+    }
+
+  return false;
+}
+
+/* Return pointer to next token within pragmat.  */
+
+static char *
+get_pragmat_item (LINE_T **top, char **ch)
+{
+  LINE_T *u = *top;
+  char *v = *ch;
+  while (u != NO_LINE)
+    {
+      while (v[0] != NULL_CHAR)
+       {
+         if (!ISSPACE (v[0]) && v[0] != NEWLINE_CHAR)
+           {
+             *top = u;
+             *ch = v;
+             return v;
+           }
+         else
+           {
+             v++;
+           }
+       }
+      FORWARD (u);
+      if (u != NO_LINE)
+       v = &(STRING (u)[0]);
+      else
+       v = NO_TEXT;
+  }
+
+  return NO_TEXT;
+}
+
+/* Scan for the next pragmat and yield the first item within it.  */
+
+static char *
+next_preprocessor_item (LINE_T **top, char **ch, int *delim)
+{
+  LINE_T *u = *top;
+  char *v = *ch;
+  *delim = 0;
+  while (u != NO_LINE)
+    {
+      while (v[0] != NULL_CHAR)
+       {
+         LINE_T *start_l = u;
+         char *start_c = v;
+
+         if (v[0] == QUOTE_CHAR)
+           {
+             /* Skip string denotation.  */
+             SCAN_ERROR (!skip_string (&u, &v), start_l, start_c,
+                         "unterminated string");
+           }
+         else if (a68_find_keyword (A68 (top_keyword), "COMMENT") != NO_KEYWORD
+                  && is_bold (v, "COMMENT"))
+           {
+             /* Skip comment.  */
+             SCAN_ERROR (!skip_comment (&u, &v, BOLD_COMMENT_SYMBOL), start_l, start_c,
+                         "unterminated comment");
+           }
+         else if (a68_find_keyword (A68 (top_keyword), "CO") != NO_KEYWORD
+                  && is_bold (v, "CO"))
+           {
+             /* skip comment.  */
+             SCAN_ERROR (!skip_comment (&u, &v, STYLE_I_COMMENT_SYMBOL), start_l, start_c,
+                         "unterminated comment");
+           }
+         else if (a68_find_keyword (A68 (top_keyword), "#") != NO_KEYWORD
+                  && v[0] == '#')
+           {
+             SCAN_ERROR (!skip_comment (&u, &v, STYLE_II_COMMENT_SYMBOL), start_l, start_c,
+                         "unterminated comment");
+           }
+         else if (a68_find_keyword (A68 (top_keyword), "NOTE") != NO_KEYWORD
+                  && is_bold (v, "NOTE"))
+           {
+             SCAN_ERROR (!skip_comment (&u, &v, BOLD_COMMENT_BEGIN_SYMBOL), start_l, start_c,
+                         "unterminated comment");
+           }
+         else if (a68_find_keyword (A68 (top_keyword), "{") != NO_KEYWORD
+                  && v[0] == '{')
+           {
+             SCAN_ERROR (!skip_comment (&u, &v, BRIEF_COMMENT_BEGIN_SYMBOL), start_l, start_c,
+                         "unterminated comment");
+           }
+         else if (is_bold (v, "PRAGMAT") || is_bold (v, "PR"))
+           {
+             /* We caught a PRAGMAT.  */
+             char *item;
+             if (is_bold (v, "PRAGMAT"))
+               {
+                 *delim = BOLD_PRAGMAT_SYMBOL;
+                 v = &v[strlen ("PRAGMAT")];
+               }
+             else if (is_bold (v, "PR"))
+               {
+                 *delim = STYLE_I_PRAGMAT_SYMBOL;
+                 v = &v[strlen ("PR")];
+               }
+             item = get_pragmat_item (&u, &v);
+             SCAN_ERROR (item == NO_TEXT, start_l, start_c,
+                         "unterminated pragmat");
+
+             if (streq (item, "INCLUDE") == 0)
+               {
+                 /* Item "INCLUDE" includes a file.  */
+                 *top = u;
+                 *ch = v;
+                 return item;
+               }
+             else
+               {
+                 /* Unrecognised item - probably options handled later by the
+                    tokeniser.  */
+                 SCAN_ERROR (!skip_pragmat (&u, &v, *delim, false), start_l, start_c,
+                             "unterminated pragmat");
+               }
+           }
+         else if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING && ISUPPER (v[0]))
+           {
+             /* Skip a bold word as you may trigger on REPR, for instance.  */
+             while (ISUPPER (v[0]))
+               v++;
+           }
+         else if (OPTION_STROPPING (&A68_JOB) == SUPPER_STROPPING && ISLOWER (v[0]))
+           {
+             /* Skip a tag as you may trigger on expr, for instance.  */
+             while (ISLOWER (v[0]) || ISDIGIT (v[0]) || v[0] == '_')
+               v++;
+           }
+         else
+           {
+             v++;
+           }
+       }
+
+      FORWARD (u);
+      if (u != NO_LINE)
+       v = &(STRING (u)[0]);
+      else
+       v = NO_TEXT;
+    }
+
+  *top = u;
+  *ch = v;
+  return NO_TEXT;
+}
+
+/* Concatenate the two paths P1 and P2.  */
+
+static char *
+a68_relpath (const char *p1, const char *p2, const char *fn)
+{
+#if defined(__GNU__)
+  /* The Hurd doesn't define PATH_MAX.  */
+# define PATH_MAX 4096
+#endif
+
+  char q[PATH_MAX + 1];
+  a68_bufcpy (q, p1, PATH_MAX);
+  a68_bufcat (q, "/", PATH_MAX);
+  a68_bufcat (q, p2, PATH_MAX);
+  a68_bufcat (q, "/", PATH_MAX);
+  a68_bufcat (q, fn, PATH_MAX);
+  /* Home directory shortcut ~ is a shell extension.  */
+  if (strchr (q, '~') != NO_TEXT) {
+    return NO_TEXT;
+  }
+  char *r = (char *) xmalloc (PATH_MAX + 1);
+  gcc_assert (r != NULL);
+  /*  Error handling in the caller!  */
+  errno = 0;
+  r = lrealpath (q);
+  return r;
+}
+
+/* Return true if we can open the file for reading.  False otherwise.  */
+
+static bool
+file_read_p (const char *filename)
+{
+  return access (filename, R_OK) == 0 ? true : false;
+}
+
+/* Find a file to include into the current source being parsed.  Search the file
+   system for FILENAME and return a string with the file path.  If the file is
+   not found, return NULL.
+
+   When FILENAME is not an absolute path we first try to find it relative to the
+   current file being parsed (CURFILE). Failing to do that we use the search
+   paths provided by the -I option.  */
+
+static char *
+find_include_file (const char *curfile, const char *filename)
+{
+  char *filepath = NO_TEXT;
+  char *tmpfpath = NO_TEXT;
+  char *fnbdir = ldirname (filename);
+  const char *incfile = lbasename (filename);
+
+  if (fnbdir == NULL || incfile == NULL)
+    gcc_unreachable ();
+
+  if (!IS_ABSOLUTE_PATH (filename))
+    {
+      char *sourcedir = ldirname (curfile);
+
+      if (sourcedir == NULL || fnbdir == NULL)
+       gcc_unreachable ();
+
+      if (strlen (sourcedir) == 0 && strlen (fnbdir) == 0)
+       {
+         free (sourcedir);
+         sourcedir = (char *) xmalloc (2);
+         a68_bufcpy (sourcedir, ".", 2);
+       }
+
+      tmpfpath = a68_relpath (sourcedir, fnbdir, incfile);
+      if (file_read_p (tmpfpath))
+       {
+         filepath = tmpfpath;
+         goto cleanup;
+       }
+
+      for (unsigned ix = 0; ix != vec_safe_length (A68_INCLUDE_PATHS); ix++)
+       {
+         const char *include_dir = (*(A68_INCLUDE_PATHS))[ix];
+         tmpfpath = a68_relpath (include_dir, fnbdir, incfile);
+         if (!IS_ABSOLUTE_PATH (tmpfpath))
+           tmpfpath = a68_relpath (sourcedir, fnbdir, incfile);
+         if (file_read_p (tmpfpath))
+           {
+             filepath = tmpfpath;
+             goto cleanup;
+           }
+       }
+
+    cleanup:
+      free (sourcedir);
+      goto end;
+    }
+  else
+    {
+      size_t fnwid = (int) strlen (filename) + 1;
+      tmpfpath = (char *) xmalloc ((size_t) fnwid);
+      a68_bufcpy (tmpfpath, filename, fnwid);
+
+      if (file_read_p (tmpfpath))
+       {
+         filepath = tmpfpath;
+         goto end;
+       }
+    }
+
+end:
+  free (fnbdir);
+  return filepath;
+}
+
+/* Include files.
+   This function handles the INCLUDE pragmat in the source file.  */
+
+static void
+include_files (LINE_T *top)
+{
+  /* syntax: PR include "filename" PR
+
+     The file gets inserted before the line containing the pragmat. In this way
+     correct line numbers are preserved which helps diagnostics. A file that
+     has been included will not be included a second time - it will be ignored.
+     A rigorous fail-safe, but there is no mechanism to prevent recursive
+     includes in A68 source code. User reports do not indicate sophisticated
+     use of INCLUDE, so this is fine for now.
+  */
+
+  bool make_pass = true;
+  while (make_pass)
+    {
+      LINE_T *s, *t, *u = top;
+      char *v = &(STRING (u)[0]);
+      make_pass = false;
+      errno = 0;
+      while (u != NO_LINE)
+       {
+         int pr_lim;
+         char *item = next_preprocessor_item (&u, &v, &pr_lim);
+         LINE_T *start_l = u;
+         char *start_c = v;
+         /* Search for PR include "filename" PR.  */
+         if (item != NO_TEXT && streq (item, "INCLUDE") == 0)
+           {
+             FILE *fp;
+             int fd;
+             size_t fsize, k;
+             int n, linum, bytes_read;
+             char *fbuf, delim;
+             BUFFER fnb;
+             char *fn = NO_TEXT;
+             /* Skip to filename.  */
+             while (ISALPHA (v[0]))
+               v++;
+             while (ISSPACE (v[0]))
+               v++;
+             /* Scan quoted filename.  */
+             SCAN_ERROR ((v[0] != QUOTE_CHAR && v[0] != '\''), start_l, start_c,
+                         "incorrect filename");
+             delim = (v++)[0];
+             n = 0;
+             fnb[0] = NULL_CHAR;
+             /* Scan Algol 68 string (note: "" denotes a ", while in C it
+                concatenates).  */
+             do
+               {
+                 SCAN_ERROR (EOL (v[0]), start_l, start_c,
+                             "incorrect filename");
+                 SCAN_ERROR (n == BUFFER_SIZE - 1, start_l, start_c,
+                             "incorrect filename");
+                 if (v[0] == delim)
+                   {
+                     while (v[0] == delim && v[1] == delim)
+                       {
+                         SCAN_ERROR (n == BUFFER_SIZE - 1, start_l, start_c,
+                                     "incorrect filename");
+                         fnb[n++] = delim;
+                         fnb[n] = NULL_CHAR;
+                         v += 2;
+                       }
+                   }
+                 else if (ISPRINT (v[0]))
+                   {
+                     fnb[n++] = *(v++);
+                     fnb[n] = NULL_CHAR;
+                   }
+                 else
+                   {
+                     SCAN_ERROR (true, start_l, start_c,
+                                 "incorrect filename");
+                   }
+               }
+             while (v[0] != delim);
+
+             /* Insist that the pragmat is closed properly.  */
+             v = &v[1];
+             SCAN_ERROR (!skip_pragmat (&u, &v, pr_lim, true), start_l, start_c,
+                         "unterminated pragmat");
+             SCAN_ERROR (n == 0, start_l, start_c,
+                         "incorrect filename");
+
+             char *sourcefile = NO_TEXT;
+             if (FILENAME (u) != NO_TEXT)
+               {
+                 sourcefile = xstrdup (FILENAME (u));
+               }
+             else
+               {
+                 sourcefile = (char *) xmalloc (2);
+                 a68_bufcpy (sourcefile, ".", 1);
+               }
+             fn = find_include_file (sourcefile, fnb);
+             free (sourcefile);
+
+             /* Do not check errno, since errno may be undefined here
+                after a successful call.  */
+             if (fn != NO_TEXT)
+               a68_bufcpy (fnb, fn, BUFFER_SIZE);
+             else
+               {
+                 SCAN_ERROR (true, start_l, start_c,
+                             "included file not found");
+               }
+             size_t fnwid = (int) strlen (fnb) + 1;
+             fn = (char *) xmalloc ((size_t) fnwid);
+             a68_bufcpy (fn, fnb, fnwid);
+
+             /* Ignore the file when included more than once.  */
+             for (t = top; t != NO_LINE; t = NEXT (t))
+               {
+                 if (strcmp (FILENAME (t), fn) == 0)
+                   goto search_next_pragmat;
+               }
+             t = NO_LINE;
+
+             /* Access the file.  */
+             errno = 0;
+             fp = fopen (fn, "r");
+             SCAN_ERROR (fp == NULL, start_l, start_c,
+                         "error opening included file");
+             fd = fileno (fp);
+             errno = 0;
+             off_t off = lseek (fd, 0, SEEK_END);
+             gcc_assert (off >= 0);
+             fsize = (size_t) off;
+             SCAN_ERROR (errno != 0, start_l, start_c,
+                         "error while reading file");
+             fbuf = (char *) xmalloc (8 + fsize);
+             errno = 0;
+             if (lseek (fd, 0, SEEK_SET) < 0)
+               gcc_unreachable ();
+             SCAN_ERROR (errno != 0, start_l, start_c,
+                         "error while reading file");
+             errno = 0;
+             bytes_read = (int) io_read (fp, fbuf, (size_t) fsize);
+             SCAN_ERROR (errno != 0 || (size_t) bytes_read != fsize, start_l, start_c,
+                         "error while reading file");
+
+             /* Buffer still usable?.  */
+             if (fsize > A68_PARSER (max_scan_buf_length))
+               {
+                 A68_PARSER (max_scan_buf_length) = fsize;
+                 A68_PARSER (scan_buf) = (char *) xmalloc (8 + A68_PARSER (max_scan_buf_length));
+               }
+
+             /* Link all lines into the list.  */
+             linum = 1;
+             s = u;
+             t = PREVIOUS (u);
+             k = 0;
+             if (fsize == 0)
+               {
+                 /* If file is empty, insert single empty line.  */
+                 A68_PARSER (scan_buf)[0] = NEWLINE_CHAR;
+                 A68_PARSER (scan_buf)[1] = NULL_CHAR;
+                 append_source_line (A68_PARSER (scan_buf), &t, &linum, fn);
+               }
+             else
+               {
+                 while (k < fsize)
+                   {
+                     n = 0;
+                     A68_PARSER (scan_buf)[0] = NULL_CHAR;
+                     while (k < fsize && fbuf[k] != NEWLINE_CHAR)
+                       {
+                         SCAN_ERROR ((ISCNTRL (fbuf[k]) && !ISSPACE (fbuf[k]))
+                                     || fbuf[k] == STOP_CHAR,
+                                     start_l, start_c,
+                                     "invalid characters in included file");
+                         A68_PARSER (scan_buf)[n++] = fbuf[k++];
+                         A68_PARSER (scan_buf)[n] = NULL_CHAR;
+                       }
+                     A68_PARSER (scan_buf)[n++] = NEWLINE_CHAR;
+                     A68_PARSER (scan_buf)[n] = NULL_CHAR;
+                     if (k < fsize)
+                       k++;
+                     append_source_line (A68_PARSER (scan_buf), &t, &linum, fn);
+                   }
+               }
+
+             /* Conclude and go find another include directive, if any.  */
+             NEXT (t) = s;
+             PREVIOUS (s) = t;
+             concatenate_lines (top);
+             if (fclose (fp) != 0)
+               gcc_unreachable ();
+             make_pass = true;
+           }
+       search_next_pragmat:
+         { (void) 0; };
+       }
+    }
+}
+
+/* Handle a pragment (pragmat or comment).  */
+
+static char *
+pragment (int type, LINE_T **ref_l, char **ref_c)
+{
+#define INIT_BUFFER                                    \
+  do                                                   \
+    {                                                  \
+      chars_in_buf = 0;                                        \
+      A68_PARSER (scan_buf)[chars_in_buf] = '\0';      \
+    }                                                  \
+  while (0)
+
+#define ADD_ONE_CHAR(CH)                                               \
+  do                                                                   \
+    {                                                                  \
+      A68_PARSER (scan_buf)[chars_in_buf ++] = (CH);                   \
+      A68_PARSER (scan_buf)[chars_in_buf] = '\0';                      \
+    }                                                                  \
+  while (0)
+
+  const char *term_s = NO_TEXT;
+  const char *beg_s = NO_TEXT;
+  char c = **ref_c, *start_c = *ref_c;
+  char *z = NO_TEXT;
+  LINE_T *start_l = *ref_l;
+  int beg_s_length, term_s_length, chars_in_buf;
+  bool stop, pragmat = false;
+
+  /* Set terminator to look for.  */
+  if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING)
+    {
+      if (type == STYLE_I_COMMENT_SYMBOL)
+       term_s = "CO";
+      else if (type == STYLE_II_COMMENT_SYMBOL)
+       term_s = "#";
+      else if (type == BOLD_COMMENT_SYMBOL)
+       term_s = "COMMENT";
+      else if (type == BOLD_COMMENT_BEGIN_SYMBOL)
+       {
+         beg_s = "NOTE";
+         term_s = "ETON";
+       }
+      else if (type == BRIEF_COMMENT_BEGIN_SYMBOL)
+       {
+         beg_s = "{";
+         term_s = "}";
+       }
+      else if (type == STYLE_I_PRAGMAT_SYMBOL)
+       {
+         term_s = "PR";
+         pragmat = true;
+       }
+      else if (type == BOLD_PRAGMAT_SYMBOL)
+       {
+         term_s = "PRAGMAT";
+         pragmat = true;
+       }
+    }
+  else
+    {
+      /* SUPPER stropping.  */
+      if (type == STYLE_I_COMMENT_SYMBOL)
+       term_s = "co";
+      else if (type == STYLE_II_COMMENT_SYMBOL)
+       term_s = "#";
+      else if (type == BOLD_COMMENT_SYMBOL)
+       term_s = "comment";
+      else if (type == BOLD_COMMENT_BEGIN_SYMBOL)
+       {
+         beg_s = "note";
+         term_s = "eton";
+       }
+      else if (type == BRIEF_COMMENT_BEGIN_SYMBOL)
+       {
+         beg_s = "{";
+         term_s = "}";
+       }
+      else if (type == STYLE_I_PRAGMAT_SYMBOL)
+       {
+         term_s = "pr";
+         pragmat = true;
+       }
+      else if (type == BOLD_PRAGMAT_SYMBOL)
+       {
+         term_s = "pragmat";
+         pragmat = true;
+       }
+    }
+
+  beg_s_length = (beg_s != NO_TEXT ? (int) strlen (beg_s) : 0);
+  term_s_length = (int) strlen (term_s);
+
+  /* Scan for terminator.  */
+  bool nestable_comment = (beg_s != NO_TEXT);
+  int nesting_level = 1;
+  INIT_BUFFER;
+  stop = false;
+  while (stop == false)
+    {
+      SCAN_ERROR (c == STOP_CHAR, start_l, start_c,
+                 "unterminated pragment");
+
+      /* A ".." or '..' delimited string in a PRAGMAT, or
+        a ".." in a nestable comment.  */
+      if ((pragmat && (c == QUOTE_CHAR || c == '\''))
+         || (nestable_comment && c == QUOTE_CHAR))
+       {
+         char delim = c;
+         bool eos = false;
+         ADD_ONE_CHAR (c);
+         c = next_char (ref_l, ref_c, false);
+         while (!eos)
+           {
+             SCAN_ERROR (EOL (c), start_l, start_c,
+                         "string within pragment exceeds end of line");
+
+             if (c == delim)
+               {
+                 ADD_ONE_CHAR (delim);
+                 save_state (*ref_l, *ref_c, c);
+                 c = next_char (ref_l, ref_c, false);
+                 if (c == delim)
+                   c = next_char (ref_l, ref_c, false);
+                 else
+                   {
+                     restore_state (ref_l, ref_c, &c);
+                     eos = true;
+                   }
+               }
+             else if (ISPRINT (c))
+               {
+                 ADD_ONE_CHAR (c);
+                 c = next_char (ref_l, ref_c, false);
+               }
+             else
+               unworthy (start_l, start_c, c);
+           }
+       }
+      else if (EOL (c))
+       ADD_ONE_CHAR (NEWLINE_CHAR);
+      else if (ISPRINT (c) || ISSPACE (c))
+       ADD_ONE_CHAR (c);
+
+      if (nestable_comment && chars_in_buf >= beg_s_length)
+       {
+         /* If we find another instance of the nestable begin mark, bump the
+            nesting level and continue scanning.  */
+         if (strcmp (beg_s,
+                     &(A68_PARSER (scan_buf)[chars_in_buf - beg_s_length])) == 0)
+           {
+             nesting_level += 1;
+             goto nextchar;
+           }
+       }
+
+      if (chars_in_buf >= term_s_length)
+       {
+         /* Check whether we encountered the terminator.  Mind nesting if
+            necessary.  */
+         if (strcmp (term_s,
+                     &(A68_PARSER (scan_buf)[chars_in_buf - term_s_length])) == 0)
+           {
+             if (nestable_comment)
+               {
+                 gcc_assert (nesting_level > 0);
+                 nesting_level -= 1;
+                 stop = (nesting_level == 0);
+               }
+             else
+               stop = true;
+           }
+       }
+
+    nextchar:
+      c = next_char (ref_l, ref_c, false);
+    }
+
+  A68_PARSER (scan_buf)[chars_in_buf - term_s_length] = '\0';
+  z = a68_new_string (term_s, A68_PARSER (scan_buf), term_s, NO_TEXT);
+  return z;
+#undef ADD_ONE_CHAR
+#undef INIT_BUFFER
+}
+
+/* Whether input shows exponent character.  */
+
+static bool
+is_exp_char (LINE_T **ref_l, char **ref_s, char *ch)
+{
+  bool ret = false;
+
+  char exp_syms[3];
+
+  /* Note that this works for both UPPER and SUPPER stropping regimes.  */
+  exp_syms[0] = EXPONENT_CHAR;
+  exp_syms[1] = TOUPPER (EXPONENT_CHAR);
+  exp_syms[2] = '\0';
+
+  save_state (*ref_l, *ref_s, *ch);
+  if (strchr (exp_syms, *ch) != NO_TEXT)
+    {
+      *ch = next_char (ref_l, ref_s, true);
+      ret = (strchr ("+-0123456789", *ch) != NO_TEXT);
+    }
+  restore_state (ref_l, ref_s, ch);
+  return ret;
+}
+
+/* Whether input shows radix character.  */
+
+static bool
+is_radix_char (LINE_T **ref_l, char **ref_s, char *ch)
+{
+  bool ret = false;
+
+  save_state (*ref_l, *ref_s, *ch);
+  /* Note that this works for both UPPER and SUPPER stropping regimes.  */
+  if (*ch == RADIX_CHAR)
+    {
+      *ch = next_char (ref_l, ref_s, true);
+      ret = (strchr ("0123456789abcdef", *ch) != NO_TEXT);
+    }
+  restore_state (ref_l, ref_s, ch);
+  return ret;
+}
+
+/* Whether input shows decimal point.  */
+
+static bool
+is_decimal_point (LINE_T **ref_l, char **ref_s, char *ch)
+{
+  bool ret = false;
+
+  save_state (*ref_l, *ref_s, *ch);
+  if (*ch == POINT_CHAR)
+    {
+      char exp_syms[3];
+
+      /* Note that this works for both UPPER and SUPPER stropping regimes.  */
+      exp_syms[0] = EXPONENT_CHAR;
+      exp_syms[1] = TOUPPER (EXPONENT_CHAR);
+      exp_syms[2] = '\0';
+
+      *ch = next_char (ref_l, ref_s, true);
+      if (strchr (exp_syms, *ch) != NO_TEXT)
+       {
+         *ch = next_char (ref_l, ref_s, true);
+         ret = (strchr ("+-0123456789", *ch) != NO_TEXT);
+       }
+      else
+       ret = (strchr ("0123456789", *ch) != NO_TEXT);
+    }
+  restore_state (ref_l, ref_s, ch);
+  return ret;
+}
+
+/* Attribute for format item.  */
+
+static enum a68_attribute
+get_format_item (char ch)
+{
+  switch (TOLOWER (ch))
+    {
+    case 'a':
+      return FORMAT_ITEM_A;
+    case 'b':
+      return FORMAT_ITEM_B;
+    case 'c':
+      return FORMAT_ITEM_C;
+    case 'd':
+      return FORMAT_ITEM_D;
+    case 'e':
+      return FORMAT_ITEM_E;
+    case 'f':
+      return FORMAT_ITEM_F;
+    case 'g':
+      return FORMAT_ITEM_G;
+    case 'h':
+      return FORMAT_ITEM_H;
+    case 'i':
+      return FORMAT_ITEM_I;
+    case 'j':
+      return FORMAT_ITEM_J;
+    case 'k':
+      return FORMAT_ITEM_K;
+    case 'l':
+    case '/':
+      return FORMAT_ITEM_L;
+    case 'm':
+      return FORMAT_ITEM_M;
+    case 'n':
+      return FORMAT_ITEM_N;
+    case 'o':
+      return FORMAT_ITEM_O;
+    case 'p':
+      return FORMAT_ITEM_P;
+    case 'q':
+      return FORMAT_ITEM_Q;
+    case 'r':
+      return FORMAT_ITEM_R;
+    case 's':
+      return FORMAT_ITEM_S;
+    case 't':
+      return FORMAT_ITEM_T;
+    case 'u':
+      return FORMAT_ITEM_U;
+    case 'v':
+      return FORMAT_ITEM_V;
+    case 'w':
+      return FORMAT_ITEM_W;
+    case 'x':
+      return FORMAT_ITEM_X;
+    case 'y':
+      return FORMAT_ITEM_Y;
+    case 'z':
+      return FORMAT_ITEM_Z;
+    case '+':
+      return FORMAT_ITEM_PLUS;
+    case '-':
+      return FORMAT_ITEM_MINUS;
+    case POINT_CHAR:
+      return FORMAT_ITEM_POINT;
+    case '%':
+      return FORMAT_ITEM_ESCAPE;
+    default:
+      return STOP;
+    }
+}
+
+/* Get next token from internal copy of source file.
+
+   The kind of token is set via the passed pointer ATTR.
+   The contents of token is set in the scan_buf via SYM.
+
+   The recognized tokens are, by reported ATTR:
+
+   <unset>
+     End of file.
+   FORMAT_ITEM_*
+     Item in a format.
+   STATIC_REPLICATOR
+     INT denotation for a static replicator in a format.
+   BOLD_TAG
+     Bold tag.
+   IDENTIFIER
+     A "lower case" identifier.
+   IDENTIFIER_WITH_UNDERSCORES
+     A "lower case" identifier whose's at least one taggle
+     was found adjacent to an underscore.
+   REAL_DENOTATION
+     A REAL denotation.
+   POINT_SYMBOL
+     .
+   BITS_DENOTATION
+     A BITS denotation like 16rffff
+   INT_DENOTATION
+     An INT denotation.
+   ROW_CHAR_DENOTATION
+     A STRING denotation.
+   LITERAL
+     A literal denotation in a format.
+   STOP
+     Single-character symbols #$()[]{},;@|:
+     := /= :=: :/=:
+     The character is placed in SYM.
+   EQUALS_SYMBOL
+     The equality symbol.
+   OPERATOR
+     A predefined operator.
+*/
+
+static void
+get_next_token (bool in_format,
+               LINE_T **ref_l, char **ref_s,
+               LINE_T **start_l, char **start_c, enum a68_attribute *att)
+{
+  char c = **ref_s;
+  char *sym = A68_PARSER (scan_buf);
+
+  sym[0] = '\0';
+  get_good_char (&c, ref_l, ref_s);
+  *start_l = *ref_l;
+  *start_c = *ref_s;
+  if (c == STOP_CHAR)
+    {
+      /* We are at EOF.  */
+      (sym++)[0] = STOP_CHAR;
+      sym[0] = '\0';
+      return;
+    }
+
+  if (in_format)
+    {
+      /* In a format.  */
+      const char *format_items = "/%\\+-.abcdefghijklmnopqrstuvwxyz";
+      if (strchr (format_items, c) != NO_TEXT)
+       {
+         /* General format items.  */
+         (sym++)[0] = c;
+         sym[0] = NULL_CHAR;
+         *att = get_format_item (c);
+         (void) next_char (ref_l, ref_s, false);
+         return;
+       }
+      if (ISDIGIT (c))
+       {
+         /* INT denotation for static replicator.  */
+         SCAN_DIGITS (c);
+         sym[0] = NULL_CHAR;
+         *att = STATIC_REPLICATOR;
+         return;
+       }
+    }
+
+  if (ISUPPER (c))
+    {
+      /* Bold taggles are enabled only in gnu68.  */
+      bool allow_one_under = !OPTION_STRICT (&A68_JOB);
+
+      if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING)
+       {
+         /* In UPPER stropping a bold tag is an upper case word.  */
+         while (ISUPPER (c))
+           {
+             (sym++)[0] = c;
+             c = next_char (ref_l, ref_s, false, allow_one_under);
+           }
+         sym[0] = '\0';
+         *att = BOLD_TAG;
+       }
+      else
+       {
+         /* In SUPPER stropping a bold tag is a capitalized word that may
+            contain letters and digits.  */
+         while (ISALPHA (c) || ISDIGIT (c))
+           {
+             (sym++)[0] = c;
+             c = next_char (ref_l, ref_s, false, allow_one_under);
+           }
+         sym[0] = '\0';
+         *att = BOLD_TAG;
+       }
+    }
+  else if (ISLOWER (c))
+    {
+      /* In both UPPER and SUPPER stropping regimes a tag is a lower case word
+        which may contain letters and digits.
+
+        In SUPPER stropping, however, it is not allowed to have blanks
+        separating the taggles within tags.  */
+
+      bool allow_one_under = true;
+      bool found_under = false;
+      bool allow_typo = OPTION_STROPPING (&A68_JOB) != SUPPER_STROPPING;
+
+      /* Lower case word - identifier.  */
+      while (ISLOWER (c) || ISDIGIT (c))
+       {
+         (sym++)[0] = c;
+         c = next_char (ref_l, ref_s, allow_typo, allow_one_under,
+                        &found_under);
+       }
+
+      sym[0] = '\0';
+      *att = found_under ? IDENTIFIER_WITH_UNDERSCORES : IDENTIFIER;
+    }
+  else if (c == POINT_CHAR)
+    {
+      /* Begins with a point symbol - point, L REAL denotation.  */
+      if (is_decimal_point (ref_l, ref_s, &c))
+       {
+         (sym++)[0] = '0';
+         (sym++)[0] = POINT_CHAR;
+         c = next_char (ref_l, ref_s, true);
+         SCAN_DIGITS (c);
+         if (is_exp_char (ref_l, ref_s, &c))
+           SCAN_EXPONENT_PART (c);
+         sym[0] = '\0';
+         *att = REAL_DENOTATION;
+       }
+      else
+       {
+         c = next_char (ref_l, ref_s, true);
+         (sym++)[0] = POINT_CHAR;
+         sym[0] = '\0';
+         *att = POINT_SYMBOL;
+       }
+    }
+  else if (ISDIGIT (c))
+    {
+      /* Something that begins with a digit:
+        L INT denotation, L REAL denotation.  */
+      SCAN_DIGITS (c);
+
+      if (is_decimal_point (ref_l, ref_s, &c))
+       {
+         c = next_char (ref_l, ref_s, true);
+         if (is_exp_char (ref_l, ref_s, &c))
+           {
+             (sym++)[0] = POINT_CHAR;
+             (sym++)[0] = '0';
+             SCAN_EXPONENT_PART (c);
+             *att = REAL_DENOTATION;
+           }
+         else
+           {
+             (sym++)[0] = POINT_CHAR;
+             SCAN_DIGITS (c);
+             if (is_exp_char (ref_l, ref_s, &c))
+               SCAN_EXPONENT_PART (c);
+             *att = REAL_DENOTATION;
+           }
+       }
+      else if (is_exp_char (ref_l, ref_s, &c))
+       {
+         SCAN_EXPONENT_PART (c);
+         *att = REAL_DENOTATION;
+       }
+      else if (is_radix_char (ref_l, ref_s, &c))
+       {
+         (sym++)[0] = c;
+         c = next_char (ref_l, ref_s, true);
+         /* This is valid for both UPPER and SUPPER stropping.  */
+         while (ISDIGIT (c) || strchr ("abcdef", c) != NO_TEXT)
+           {
+             (sym++)[0] = c;
+             c = next_char (ref_l, ref_s, true);
+           }
+         *att = BITS_DENOTATION;
+       }
+      else
+       {
+         *att = INT_DENOTATION;
+       }
+      sym[0] = '\0';
+    }
+  else if (c == QUOTE_CHAR)
+    {
+      /* STRING denotation.  */
+      bool stop = false;
+
+      while (!stop)
+       {
+         c = next_char (ref_l, ref_s, false);
+         while (c != QUOTE_CHAR && c != STOP_CHAR)
+           {
+             if (c == APOSTROPHE_CHAR)
+               {
+                 (sym++)[0] = c;
+                 c = next_char (ref_l, ref_s, false);
+                 switch (c)
+                   {
+                   case APOSTROPHE_CHAR:
+                   case 'n':
+                   case 'f':
+                   case 'r':
+                   case 't':
+                     (sym++)[0] = c;
+                     break;
+                   case '(':
+                     {
+                       unsigned int num_code_points = 0;
+
+                       (sym++)[0] = c;
+                       /* Process code points.  */
+                       while (1)
+                         {
+                           /* Skip white spaces.  */
+                           while (1)
+                             {
+                               c = next_char (ref_l, ref_s, false);
+                               if (!ISSPACE (c))
+                                 break;
+                             }
+
+                           /* See if we are done.  */
+                           if (c == ')')
+                             {
+                               SCAN_ERROR (num_code_points == 0, *start_l, *ref_s,
+                                           "expected at least one character point in string break");
+                               (sym++)[0] = c;
+                               break;
+                             }
+                           else if (c == 'u' || c == 'U')
+                             {
+                               (sym++)[0] = c;
+                               /* Process a code point.  */
+                               char u = c;
+                               int numdigits = (u == 'u' ? 4 : 8);
+                               char *startpos = *ref_s;
+                               int i = 0;
+                               do
+                                 {
+                                   c = next_char (ref_l, ref_s, false);
+                                   if (!(ISDIGIT (c)
+                                         || ((c >= 'a') && (c <= 'f'))
+                                         || ((c >= 'A') && (c <= 'F'))))
+                                     {
+                                       SCAN_ERROR (true, *start_l, startpos,
+                                                   (u == 'u'
+                                                    ? "expected four hex digits in \
+string break character point"
+                                                    : "expected eight hex digits in \
+string break character point"));
+                                     }
+                                   (sym++)[0] = c;
+                                   i += 1;
+                                 }
+                               while (i < numdigits);
+
+                               /* Skip white spaces.  */
+                               while (1)
+                                 {
+                                   c = next_char (ref_l, ref_s, false);
+                                   if (!ISSPACE (c))
+                                     break;
+                                 }
+
+                               /* Comma or end of list.  */
+                               if (c == ')')
+                                 {
+                                   (sym++)[0] = c;
+                                   break;
+                                 }
+
+                               SCAN_ERROR (c != ',', *start_l, *ref_s,
+                                           "expected , or ) in string break");
+                             }
+                           else
+                             {
+                               SCAN_ERROR (true, *start_l, *ref_s,
+                                           "unterminated list of character codes");
+                             }
+                         }
+                       break;
+                     }
+                   default:
+                     SCAN_ERROR (true, *start_l, *ref_s, "invalid string break sequence");
+                   }
+               }
+             else
+               {
+                 SCAN_ERROR (EOL (c), *start_l, *start_c, "string exceeds end of line");
+                 (sym++)[0] = c;
+               }
+             c = next_char (ref_l, ref_s, false);
+           }
+         SCAN_ERROR (*ref_l == NO_LINE, *start_l, *start_c, "unterminated string");
+         c = next_char (ref_l, ref_s, false);
+         if (c == QUOTE_CHAR)
+           (sym++)[0] = QUOTE_CHAR;
+         else
+           stop = true;
+       }
+      sym[0] = '\0';
+      *att = (in_format ? LITERAL : ROW_CHAR_DENOTATION);
+    }
+  else if (strchr ("#$()[]{},;@", c) != NO_TEXT)
+    {
+      /* Single character symbols.  */
+      (sym++)[0] = c;
+      (void) next_char (ref_l, ref_s, false);
+      sym[0] = '\0';
+      *att = STOP;
+    }
+  else if (c == '|')
+    {
+      /* Bar.  */
+      (sym++)[0] = c;
+      c = next_char (ref_l, ref_s, false);
+      if (c == ':')
+       {
+         (sym++)[0] = c;
+         (void) next_char (ref_l, ref_s, false);
+       }
+      sym[0] = '\0';
+      *att = STOP;
+    }
+  else if (c == ':')
+    {
+      /* Colon, semicolon, IS, ISNT.  */
+      (sym++)[0] = c;
+      c = next_char (ref_l, ref_s, false);
+      if (c == '=')
+       {
+         (sym++)[0] = c;
+         if ((c = next_char (ref_l, ref_s, false)) == ':')
+           {
+             (sym++)[0] = c;
+             c = next_char (ref_l, ref_s, false);
+           }
+       }
+      else if (c == '/')
+       {
+         (sym++)[0] = c;
+         if ((c = next_char (ref_l, ref_s, false)) == '=')
+           {
+             (sym++)[0] = c;
+             if ((c = next_char (ref_l, ref_s, false)) == ':')
+               {
+                 (sym++)[0] = c;
+                 c = next_char (ref_l, ref_s, false);
+               }
+           }
+       }
+      else if (c == ':')
+       {
+         (sym++)[0] = c;
+         if ((c = next_char (ref_l, ref_s, false)) == '=')
+           (sym++)[0] = c;
+       }
+
+      sym[0] = '\0';
+      *att = STOP;
+
+    }
+  else if (c == '=')
+    {
+      /* Operator starting with "=".  */
+      char *scanned = sym;
+      (sym++)[0] = c;
+      c = next_char (ref_l, ref_s, false);
+      if (strchr (NOMADS, c) != NO_TEXT)
+       {
+         (sym++)[0] = c;
+         c = next_char (ref_l, ref_s, false);
+       }
+      if (c == '=')
+       {
+         (sym++)[0] = c;
+         if (next_char (ref_l, ref_s, false) == ':')
+           {
+             (sym++)[0] = ':';
+             c = next_char (ref_l, ref_s, false);
+             if (strlen (sym) < 4 && c == '=')
+               {
+                 (sym++)[0] = '=';
+                 (void) next_char (ref_l, ref_s, false);
+               }
+           }
+       }
+      else if (c == ':')
+       {
+         (sym++)[0] = c;
+         sym[0] = '\0';
+         if (next_char (ref_l, ref_s, false) == '=')
+           {
+             (sym++)[0] = '=';
+             (void) next_char (ref_l, ref_s, false);
+           }
+         else
+           {
+             SCAN_ERROR (!(strcmp (scanned, "=:") == 0 || strcmp (scanned, "==:") == 0),
+                         *start_l, *start_c, "invalid operator tag");
+           }
+       }
+      sym[0] = '\0';
+      if (strcmp (scanned, "=") == 0)
+       *att = EQUALS_SYMBOL;
+      else
+       *att = OPERATOR;
+    }
+  else if (strchr (MONADS, c) != NO_TEXT || strchr (NOMADS, c) != NO_TEXT)
+    {
+      /* Operator.  */
+      char *scanned = sym;
+      (sym++)[0] = c;
+      c = next_char (ref_l, ref_s, false);
+      if (strchr (NOMADS, c) != NO_TEXT)
+       {
+         (sym++)[0] = c;
+         c = next_char (ref_l, ref_s, false);
+       }
+      if (c == '=')
+       {
+         (sym++)[0] = c;
+         if (next_char (ref_l, ref_s, false) == ':')
+           {
+             (sym++)[0] = ':';
+             c = next_char (ref_l, ref_s, false);
+             if (strlen (scanned) < 4 && c == '=')
+               {
+                 (sym++)[0] = '=';
+                 (void) next_char (ref_l, ref_s, false);
+               }
+           }
+       }
+      else if (c == ':')
+       {
+         (sym++)[0] = c;
+         sym[0] = '\0';
+         if (next_char (ref_l, ref_s, false) == '=')
+           {
+             (sym++)[0] = '=';
+             sym[0] = '\0';
+             (void) next_char (ref_l, ref_s, false);
+           }
+         else
+           {
+             SCAN_ERROR (strcmp (&(scanned[1]), "=:") != 0,
+                         *start_l, *start_c, "invalid operator tag");
+           }
+       }
+      sym[0] = '\0';
+      *att = OPERATOR;
+    }
+  else
+    {
+      /* Afuuus ... strange characters!.  */
+      unworthy (*start_l, *start_c, (int) c);
+    }
+}
+
+/* Whether att opens an embedded clause.  */
+
+static bool
+open_nested_clause (int att)
+{
+  switch (att)
+    {
+    case OPEN_SYMBOL:
+    case BEGIN_SYMBOL:
+    case PAR_SYMBOL:
+    case IF_SYMBOL:
+    case CASE_SYMBOL:
+    case FOR_SYMBOL:
+    case FROM_SYMBOL:
+    case BY_SYMBOL:
+    case TO_SYMBOL:
+    case WHILE_SYMBOL:
+    case DO_SYMBOL:
+    case SUB_SYMBOL:
+      return true;
+    }
+  return false;
+}
+
+/* Whether att closes an embedded clause.  */
+
+static bool
+close_nested_clause (int att)
+{
+  switch (att)
+    {
+    case CLOSE_SYMBOL:
+    case END_SYMBOL:
+    case FI_SYMBOL:
+    case ESAC_SYMBOL:
+    case OD_SYMBOL:
+    case BUS_SYMBOL:
+      return true;
+    }
+  return false;
+}
+
+/* Cast a string to lower case.  */
+
+static void
+make_lower_case (char *p)
+{
+  for (; p != NO_TEXT && p[0] != '\0'; p++)
+    p[0] = TOLOWER (p[0]);
+}
+
+/* Cast a string to upper case.  */
+
+static void
+make_upper_case (char *p)
+{
+  for (; p != NO_TEXT && p[0] != '\0'; p++)
+    p[0] = TOUPPER (p[0]);
+}
+
+/* Construct a linear list of tokens.  */
+
+static void
+tokenise_source (NODE_T **root, int level, bool in_format,
+                LINE_T **l, char **s, LINE_T **start_l,
+                char **start_c)
+{
+  char *pragmat_lpr = NO_TEXT;
+  int pragmat_lprt = 0;
+  LINE_T *pragmat_lprl = NO_LINE;
+  char *pragmat_lprc = NULL;
+
+  char *comment_lpr = NO_TEXT;
+  int comment_lprt = 0;
+  LINE_T *comment_lprl = NO_LINE;
+  char *comment_lprc = NULL;
+
+  while (l != NO_VAR && !A68_PARSER (stop_scanner))
+    {
+      enum a68_attribute att = STOP;
+      get_next_token (in_format, l, s, start_l, start_c, &att);
+
+      if (A68_PARSER (scan_buf)[0] == STOP_CHAR)
+       A68_PARSER (stop_scanner) = true;
+      else if (strlen (A68_PARSER (scan_buf)) > 0 || att == ROW_CHAR_DENOTATION || att == LITERAL)
+       {
+         KEYWORD_T *kw;
+         const char *c = NO_TEXT;
+         bool make_node = true;
+         const char *trailing = NO_TEXT;
+
+         if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING)
+           {
+             /* In UPPER stropping all symbols in R9.4.1 are expressed as bold
+                tags like "BEGIN", or symbols like "@".  */
+
+             /* In this stropping regime there is no need to handle
+                identifiers for which taggles were adjacent to underscores
+                specially.  */
+             if (att != IDENTIFIER && att != IDENTIFIER_WITH_UNDERSCORES)
+               kw = a68_find_keyword (A68 (top_keyword), A68_PARSER (scan_buf));
+             else
+               kw = NO_KEYWORD;
+           }
+         else
+           {
+             /* In SUPPER stropping all symbols in R9.4.1 are expressed as
+                tags like "begin", or symbols like "@".  */
+
+             /* Normalize bold tags to all upper-case letters.  */
+             if (att == BOLD_TAG)
+               make_upper_case (A68_PARSER (scan_buf));
+
+             /* If any of the taggles of the scanned identifier were adjacent
+                to an underscore, that inhibits interpreting it as a
+                keyword.  */
+             if (att != BOLD_TAG && att != IDENTIFIER_WITH_UNDERSCORES)
+               kw = a68_find_keyword (A68 (top_keyword), A68_PARSER (scan_buf));
+             else
+               kw = NO_KEYWORD;
+           }
+
+         /* Beyond this point it is irrelevant whether an identifier had
+            taggles adjacent to an underscore.  */
+         if (att == IDENTIFIER_WITH_UNDERSCORES)
+           att = IDENTIFIER;
+
+         if (kw == NO_KEYWORD || att == ROW_CHAR_DENOTATION)
+           {
+             if (att == IDENTIFIER)
+               make_lower_case (A68_PARSER (scan_buf));
+             if (att != ROW_CHAR_DENOTATION && att != LITERAL)
+               {
+                 size_t len = strlen (A68_PARSER (scan_buf));
+                 while (len >= 1 && A68_PARSER (scan_buf)[len - 1] == '_')
+                   {
+                     trailing = "_";
+                     A68_PARSER (scan_buf)[len - 1] = NULL_CHAR;
+                     len--;
+                   }
+               }
+             c = TEXT (a68_add_token (&A68 (top_token), A68_PARSER (scan_buf)));
+           }
+         else
+           {
+             if (IS (kw, TO_SYMBOL))
+               {
+                 /* Merge GO and TO to GOTO.  */
+                 if (*root != NO_NODE && IS (*root, GO_SYMBOL))
+                   {
+                     ATTRIBUTE (*root) = GOTO_SYMBOL;
+                     NSYMBOL (*root) = TEXT (a68_find_keyword (A68 (top_keyword), "GOTO"));
+                     make_node = false;
+                   }
+                 else
+                   {
+                     att = ATTRIBUTE (kw);
+                     c = TEXT (kw);
+                   }
+               }
+             else
+               {
+                 if (OPTION_STROPPING (&A68_JOB) == UPPER_STROPPING)
+                   {
+                     if (att == 0 || att == BOLD_TAG)
+                       att = ATTRIBUTE (kw);
+                   }
+                 else
+                   {
+                     if (att == 0 || att == IDENTIFIER)
+                       att = ATTRIBUTE (kw);
+                   }
+
+                 c = TEXT (kw);
+                 /* Handle pragments.  */
+                 if (att == STYLE_II_COMMENT_SYMBOL
+                     || att == STYLE_I_COMMENT_SYMBOL
+                     || att == BOLD_COMMENT_SYMBOL
+                     || att == BOLD_COMMENT_BEGIN_SYMBOL
+                     || att == BRIEF_COMMENT_BEGIN_SYMBOL)
+                   {
+                     char *nlpr = pragment (ATTRIBUTE (kw), l, s);
+
+                     if (comment_lpr == NO_TEXT
+                         || (int) strlen (comment_lpr) == 0)
+                       comment_lpr = nlpr;
+                     else
+                       {
+                         char *stale = comment_lpr;
+                         comment_lpr
+                           = a68_new_string (comment_lpr, "n\n", nlpr, NO_TEXT);
+                         free (stale);
+                       }
+                     comment_lprt = att;
+                     comment_lprl = *start_l;
+                     comment_lprc = *start_c;
+                     make_node = false;
+                   }
+                 else if (att == STYLE_I_PRAGMAT_SYMBOL
+                          || att == BOLD_PRAGMAT_SYMBOL)
+                   {
+                     char *nlpr = pragment (ATTRIBUTE (kw), l, s);
+                     if (pragmat_lpr == NO_TEXT
+                         || (int) strlen (pragmat_lpr) == 0)
+                       pragmat_lpr = nlpr;
+                     else
+                       {
+                         char *stale = pragmat_lpr;
+                         pragmat_lpr
+                           = a68_new_string (pragmat_lpr, " ", nlpr, NO_TEXT);
+                         free (stale);
+                       }
+                     pragmat_lprt = att;
+                     pragmat_lprl = *start_l;
+                     pragmat_lprc = *start_c;
+                     if (!A68_PARSER (stop_scanner))
+                       make_node = false;
+                   }
+               }
+           }
+         /* Add token to the tree.  */
+         if (make_node)
+           {
+             NODE_T *q = a68_new_node ();
+             INFO (q) = a68_new_node_info ();
+
+             switch (att)
+               {
+               case ASSIGN_SYMBOL:
+               case END_SYMBOL:
+               case ESAC_SYMBOL:
+               case OD_SYMBOL:
+               case OF_SYMBOL:
+               case FI_SYMBOL:
+               case CLOSE_SYMBOL:
+               case BUS_SYMBOL:
+               case COLON_SYMBOL:
+               case COMMA_SYMBOL:
+               case SEMI_SYMBOL:
+                 GINFO (q) = NO_GINFO;
+                 break;
+               default:
+                 GINFO (q) = a68_new_genie_info ();
+                 break;
+               }
+
+             STATUS (q) = (STATUS_MASK_T) 0;
+             LINE (INFO (q)) = *start_l;
+             CHAR_IN_LINE (INFO (q)) = *start_c;
+             PRIO (INFO (q)) = 0;
+             PROCEDURE_LEVEL (INFO (q)) = 0;
+             ATTRIBUTE (q) = att;
+             NSYMBOL (q) = c;
+             PREVIOUS (q) = *root;
+             SUB (q) = NEXT (q) = NO_NODE;
+             TABLE (q) = NO_TABLE;
+             MOID (q) = NO_MOID;
+             TAX (q) = NO_TAG;
+             if (pragmat_lpr != NO_TEXT)
+               {
+                 NPRAGMAT (q) = pragmat_lpr;
+                 NPRAGMAT_TYPE (q) = pragmat_lprt;
+                 NPRAGMAT_LINE (q) = pragmat_lprl;
+                 NPRAGMAT_CHAR_IN_LINE (q) = pragmat_lprc;
+                 pragmat_lpr = NO_TEXT;
+                 pragmat_lprt = 0;
+               }
+             if (comment_lpr != NO_TEXT)
+               {
+                 NCOMMENT (q) = comment_lpr;
+                 NCOMMENT_TYPE (q) = comment_lprt;
+                 NCOMMENT_LINE (q) = comment_lprl;
+                 NCOMMENT_CHAR_IN_LINE (q) = comment_lprc;
+                 comment_lpr = NO_TEXT;
+                 comment_lprt = 0;
+               }
+             if (*root != NO_NODE)
+               NEXT (*root) = q;
+             if (TOP_NODE (&A68_JOB) == NO_NODE)
+               TOP_NODE (&A68_JOB) = q;
+             *root = q;
+             if (trailing != NO_TEXT)
+               a68_warning (q, 0,
+                            "ignoring trailing character H in A",
+                            trailing, att);
+           }
+         /* Redirection in tokenising formats. The scanner is a recursive-descent type as
+            to know when it scans a format text and when not.  */
+         if (in_format && att == FORMAT_DELIMITER_SYMBOL)
+           return;
+         else if (!in_format && att == FORMAT_DELIMITER_SYMBOL)
+           tokenise_source (root, level + 1, true, l, s, start_l, start_c);
+         else if (in_format && open_nested_clause (att))
+           {
+             NODE_T *z = PREVIOUS (*root);
+
+             if (z != NO_NODE && a68_is_one_of (z, FORMAT_ITEM_N, FORMAT_ITEM_G, FORMAT_ITEM_H,
+                                                FORMAT_ITEM_F, STOP))
+               {
+                 tokenise_source (root, level, false, l, s, start_l, start_c);
+               }
+             else if (att == OPEN_SYMBOL)
+               ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
+             else if (OPTION_BRACKETS (&A68_JOB) && att == SUB_SYMBOL)
+               ATTRIBUTE (*root) = FORMAT_OPEN_SYMBOL;
+           }
+         else if (!in_format && level > 0 && open_nested_clause (att))
+           tokenise_source (root, level + 1, false, l, s, start_l, start_c);
+         else if (!in_format && level > 0 && close_nested_clause (att))
+           return;
+         else if (in_format && att == CLOSE_SYMBOL)
+           ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
+         else if (OPTION_BRACKETS (&A68_JOB) && in_format && att == BUS_SYMBOL)
+           ATTRIBUTE (*root) = FORMAT_CLOSE_SYMBOL;
+       }
+    }
+}
+
+/* Tokenise source file, build initial syntax tree.  */
+
+bool
+a68_lexical_analyser (const char *filename)
+{
+  LINE_T *l = NO_LINE, *start_l = NO_LINE;
+  char *s = NO_TEXT, *start_c = NO_TEXT;
+  NODE_T *root = NO_NODE;
+
+  /* Read the source file into lines.  */
+  if (!read_source_file (filename))
+    return false;
+
+  /* Start tokenising.  */
+  A68_PARSER (stop_scanner) = false;
+  if ((l = TOP_LINE (&A68_JOB)) != NO_LINE)
+    s = STRING (l);
+  tokenise_source (&root, 0, false, &l, &s, &start_l, &start_c);
+
+  /* If the source is a prelude packet then we should remove the prelude and
+     postlude nodes from the token stream.  We distinguish these nodes by
+     location.
+
+     Yes this is crude and creepy but it works and it is less annoying than not
+     adding the prelude/postlude in read_source_file and I got other fish to
+     fry at this moment.  Somebody please fix this in a decent way, thanks -
+     jemarch.  */
+
+  NODE_T *p = TOP_NODE (&A68_JOB);
+  for (; p != NO_NODE; FORWARD (p))
+    {
+      LINE_T *l = LINE (INFO (p));
+      if (strcmp (FILENAME (l), "prelude") != 0)
+       break;
+    }
+
+  if (p != NO_NODE && IS (p, MODULE_SYMBOL))
+    {
+      p = TOP_NODE (&A68_JOB);
+      while (p != NO_NODE)
+       {
+         LINE_T *l = LINE (INFO (p));
+         if (strcmp (FILENAME (l), "prelude") == 0
+             || strcmp (FILENAME (l), "postlude") == 0)
+           {
+             if (PREVIOUS (p) != NO_NODE)
+               NEXT (PREVIOUS (p)) = NEXT (p);
+             else
+               TOP_NODE (&A68_JOB) = NEXT (p);
+
+             if (NEXT (p) != NO_NODE)
+               PREVIOUS (NEXT (p)) = PREVIOUS (p);
+
+             NODE_T *next = NEXT (p);
+             p = next;
+           }
+         else
+           p = FORWARD (p);
+       }
+    }
+
+  /* Note that A68_PARSER (scan_buf) and A68_PARSER (max_scan_buf_length) are
+     allocated by read_source_line.  */
+  free (A68_PARSER (scan_buf));
+  A68_PARSER (scan_buf) = NULL;
+  A68_PARSER (max_scan_buf_length) = 0;
+  return true;
+}
author	Jose E. Marchesi <jose.marchesi@oracle.com>
	Sat, 11 Oct 2025 17:46:56 +0000 (19:46 +0200)
committer	Jose E. Marchesi <jose.marchesi@oracle.com>
	Sun, 30 Nov 2025 00:52:08 +0000 (01:52 +0100)