parser: switch to using char* pointers

author Justin Viiret <justin.viiret@intel.com>

Thu, 2 Feb 2017 02:56:30 +0000 (13:56 +1100)

committer Matthew Barr <matthew.barr@intel.com>

Wed, 26 Apr 2017 04:57:53 +0000 (14:57 +1000)
author Justin Viiret <justin.viiret@intel.com>
Thu, 2 Feb 2017 02:56:30 +0000 (13:56 +1100)
committer Matthew Barr <matthew.barr@intel.com>
Wed, 26 Apr 2017 04:57:53 +0000 (14:57 +1000)
diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl

index dfa0bedad730b82d76b875ac1522ea33692156d5..9cddace4216036e10351ad991af5a8ba8108b02b 100644 (file)
--- a/src/parser/Parser.rl
+++ b/src/parser/Parser.rl
@@ -116,7 +116,7 @@ unsigned parseAsDecimal(unsigned oct) {
  static constexpr u32 MAX_NUMBER = INT_MAX;
  
  static
-void pushDec(u32 *acc, u8 raw_digit) {
+void pushDec(u32 *acc, char raw_digit) {
      assert(raw_digit >= '0' && raw_digit <= '9');
      u32 digit_val = raw_digit - '0';
  
@@ -130,7 +130,7 @@ void pushDec(u32 *acc, u8 raw_digit) {
  }
  
  static
-void pushOct(u32 *acc, u8 raw_digit) {
+void pushOct(u32 *acc, char raw_digit) {
      assert(raw_digit >= '0' && raw_digit <= '7');
      u32 digit_val = raw_digit - '0';
  
@@ -169,8 +169,7 @@ ComponentSequence *enterSequence(ComponentSequence *parent,
  }
  
  static
-void addLiteral(ComponentSequence *currentSeq, unsigned char c,
-                const ParseMode &mode) {
+void addLiteral(ComponentSequence *currentSeq, char c, const ParseMode &mode) {
      if (mode.utf8 && mode.caseless) {
          /* leverage ComponentClass to generate the vertices */
          auto cc = getComponentClass(mode);
@@ -197,7 +196,7 @@ void addEscaped(ComponentSequence *currentSeq, unichar accum,
          if (accum > 255) {
              throw LocatedParseError(err_msg);
          }
-        addLiteral(currentSeq, (unsigned char)accum, mode);
+        addLiteral(currentSeq, (char)accum, mode);
      }
  }
  
@@ -217,7 +216,7 @@ void addEscapedHex(ComponentSequence *currentSeq, unichar accum,
  #define SLASH_C_ERROR "\\c must be followed by an ASCII character"
  
  static
-u8 decodeCtrl(u8 raw) {
+u8 decodeCtrl(char raw) {
      if (raw & 0x80) {
          throw LocatedParseError(SLASH_C_ERROR);
      }
@@ -225,10 +224,10 @@ u8 decodeCtrl(u8 raw) {
  }
  
  static
-unichar readUtf8CodePoint2c(const u8 *ts) {
+unichar readUtf8CodePoint2c(const char *s) {
+    auto *ts = (const u8 *)s;
      assert(ts[0] >= 0xc0 && ts[0] < 0xe0);
      assert(ts[1] >= 0x80 && ts[1] < 0xc0);
-
      unichar val = ts[0] & 0x1f;
      val <<= 6;
      val |= ts[1] & 0x3f;
@@ -238,7 +237,8 @@ unichar readUtf8CodePoint2c(const u8 *ts) {
  }
  
  static
-unichar readUtf8CodePoint3c(const u8 *ts) {
+unichar readUtf8CodePoint3c(const char *s) {
+    auto *ts = (const u8 *)s;
      assert(ts[0] >= 0xe0 && ts[0] < 0xf0);
      assert(ts[1] >= 0x80 && ts[1] < 0xc0);
      assert(ts[2] >= 0x80 && ts[2] < 0xc0);
@@ -253,7 +253,8 @@ unichar readUtf8CodePoint3c(const u8 *ts) {
  }
  
  static
-unichar readUtf8CodePoint4c(const u8 *ts) {
+unichar readUtf8CodePoint4c(const char *s) {
+    auto *ts = (const u8 *)s;
      assert(ts[0] >= 0xf0 && ts[0] < 0xf8);
      assert(ts[1] >= 0x80 && ts[1] < 0xc0);
      assert(ts[2] >= 0x80 && ts[2] < 0xc0);
@@ -273,12 +274,10 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
  %%{
      machine regex;
  
-    alphtype unsigned char;
-
      action throwUnsupportedEscape {
          ostringstream str;
-        str << "'\\" << (char)*(ts + 1) << "' at index "
-            << ts - ptr << " not supported in a character class.";
+        str << "'\\" << *(ts + 1) << "' at index " << ts - ptr
+            << " not supported in a character class.";
          throw ParseError(str.str());
      }
      action unsupportedProperty {
@@ -974,7 +973,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
                };
  
                '\\o{' [0-7]+ '}' => {
-                  string oct((const char *)ts + 3, te - ts - 4);
+                  string oct(ts + 3, te - ts - 4);
                    long int val = strtol(oct.c_str(), nullptr, 8);
                    if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) {
                        throw LocatedParseError("Value in \\o{...} sequence is too large");
@@ -999,7 +998,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
                };
                # Unicode Hex
                '\\x{' xdigit+ '}' => {
-                  string hex((const char *)ts + 3, te - ts - 4);
+                  string hex(ts + 3, te - ts - 4);
                    long int val = strtol(hex.c_str(), nullptr, 16);
                    if (val > MAX_UNICODE) {
                        throw LocatedParseError("Value in \\x{...} sequence is too large");
@@ -1089,7 +1088,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
  
                # Literal character
                (any - ']') => {
-                  currentCls->add(*ts);
+                  currentCls->add((u8)*ts);
                };
  
                ']' => {
@@ -1443,7 +1442,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
                        // Otherwise, we interpret the first three digits as an
                        // octal escape, and the remaining characters stand for
                        // themselves as literals.
-                      const u8 *s = ts;
+                      const char *s = ts;
                        unsigned int accum = 0;
                        unsigned int oct_digits = 0;
                        assert(*s == '\\'); // token starts at backslash
@@ -1488,7 +1487,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
                    throw LocatedParseError("Invalid reference after \\g");
                };
                '\\o{' [0-7]+ '}' => {
-                  string oct((const char *)ts + 3, te - ts - 4);
+                  string oct(ts + 3, te - ts - 4);
                    long int val = strtol(oct.c_str(), nullptr, 8);
                    if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) {
                        throw LocatedParseError("Value in \\o{...} sequence is too large");
@@ -1505,7 +1504,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
                };
                # Unicode Hex
                '\\x{' xdigit+ '}' => {
-                  string hex((const char *)ts + 3, te - ts - 4);
+                  string hex(ts + 3, te - ts - 4);
                    long int val = strtol(hex.c_str(), nullptr, 16);
                    if (val > MAX_UNICODE) {
                        throw LocatedParseError("Value in \\x{...} sequence is too large");
@@ -1529,8 +1528,8 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
                # A bunch of unsupported (for now) escapes
                escapedUnsupported => {
                    ostringstream str;
-                  str << "'\\" << (char)*(ts + 1) << "' at index "
-                      << ts - ptr << " not supported.";
+                  str << "'\\" << *(ts + 1) << "' at index " << ts - ptr
+                      << " not supported.";
                    throw ParseError(str.str());
                };
  
@@ -1831,24 +1830,22 @@ unichar readUtf8CodePoint4c(const u8 *ts) {
  %% write data nofinal;
  
  /** \brief Main parser call, returns root Component or nullptr. */
-unique_ptr<Component> parse(const char *c_ptr, ParseMode &globalMode) {
-    assert(c_ptr);
+unique_ptr<Component> parse(const char *ptr, ParseMode &globalMode) {
+    assert(ptr);
  
-    const u8 *ptr = (const u8 *const)c_ptr;
-    const u8 *p = ptr;
-    const u8 *pe = ptr + strlen(c_ptr);
+    const char *p = ptr;
+    const char *pe = ptr + strlen(ptr);
  
      // First, read the control verbs, set any global mode flags and move the
      // ptr forward.
-    p = (const u8 *)read_control_verbs((const char *)p, (const char *)pe,
-                                       globalMode);
+    p = read_control_verbs(p, pe, globalMode);
  
-    const u8 *eof = pe;
+    const char *eof = pe;
      int cs;
      UNUSED int act;
      int top;
      vector<int> stack;
-    const u8 *ts, *te;
+    const char *ts, *te;
      unichar accumulator = 0;
      unichar octAccumulator = 0; /* required as we are also accumulating for
                                   * back ref when looking for octals */
@@ -1894,7 +1891,7 @@ unique_ptr<Component> parse(const char *c_ptr, ParseMode &globalMode) {
      bool inCharClassEarly = false;
  
      // Location at which the current character class began.
-    const u8 *currentClsBegin = p;
+    const char *currentClsBegin = p;
  
      // We throw exceptions on various parsing failures beyond this point: we
      // use a try/catch block here to clean up our allocated memory before we
author	Justin Viiret <justin.viiret@intel.com>
	Thu, 2 Feb 2017 02:56:30 +0000 (13:56 +1100)
committer	Matthew Barr <matthew.barr@intel.com>
	Wed, 26 Apr 2017 04:57:53 +0000 (14:57 +1000)