]> git.ipfire.org Git - thirdparty/json-c.git/commitdiff
optimizations to json_tokener_parse_ex(), printbuf_memappend()
authorMichael Clark <michael@metaparadigm.com>
Mon, 27 Apr 2009 08:16:58 +0000 (08:16 +0000)
committerMichael Clark <michael@metaparadigm.com>
Mon, 27 Apr 2009 08:16:58 +0000 (08:16 +0000)
  -- Brent Miller, bdmiller at yahoo dash inc dot com

git-svn-id: http://svn.metaparadigm.com/svn/json-c/trunk@34 327403b1-1117-474d-bef2-5cb71233fd97

ChangeLog
json_tokener.c
printbuf.c
printbuf.h

index 4cdd9a0493967a10887a1003f4baf5bab23bce5d..b07ea57a724c47380f2a3d4514a1267b5f8acede 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,6 @@
 0.9
+  * optimizations to json_tokener_parse_ex(), printbuf_memappend()
+    Brent Miller, bdmiller at yahoo dash inc dot com
   * Don't use this as a variable, so we can compile with a C++ compiler
   * Add casts from void* to type of assignment when using malloc 
   * Add #ifdef __cplusplus guards to all of the headers
index beaa956c669a9cdb04b8d0f5cfcc33c7e115df91..8e8c6d9de3db315f240c1dee79ff62a52739b8a2 100644 (file)
@@ -7,6 +7,10 @@
  * This library is free software; you can redistribute it and/or modify
  * it under the terms of the MIT license. See COPYING for details.
  *
+ *
+ * Copyright (c) 2008-2009 Yahoo! Inc.  All rights reserved.
+ * The copyrights to the contents of this file are licensed under the MIT License
+ * (http://www.opensource.org/licenses/mit-license.php)
  */
 
 #include "config.h"
@@ -135,35 +139,68 @@ char* strndup(const char* str, size_t n)
 #define current tok->stack[tok->depth].current
 #define obj_field_name tok->stack[tok->depth].obj_field_name
 
+/* Optimization:
+ * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
+ * iterating character-by character.  A large performance boost is
+ * achieved by using tighter loops to locally handle units such as
+ * comments and strings.  Loops that handle an entire token within 
+ * their scope also gather entire strings and pass them to 
+ * printbuf_memappend() in a single call, rather than calling
+ * printbuf_memappend() one char at a time.
+ *
+ * POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
+ * common to both the main loop and the tighter loops.
+ */
+
+/* POP_CHAR(dest, tok) macro:
+ *   Not really a pop()...peeks at the current char and stores it in dest.
+ *   Returns 1 on success, sets tok->err and returns 0 if no more chars.
+ *   Implicit inputs:  str, len vars
+ */
+#define POP_CHAR(dest, tok)                                                  \
+  (((tok)->char_offset == len) ?                                          \
+   (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
+    (((tok)->err = json_tokener_success), 0)                              \
+    :                                                                   \
+    (((tok)->err = json_tokener_continue), 0)                             \
+    ) :                                                                 \
+   (((dest) = *str), 1)                                                 \
+   )
+/* ADVANCE_CHAR() macro:
+ *   Incrementes str & tok->char_offset.
+ *   For convenience of existing conditionals, returns the old value of c (0 on eof)
+ *   Implicit inputs:  c var
+ */
+#define ADVANCE_CHAR(str, tok) \
+  ( ++(str), ((tok)->char_offset)++, c)
+
+/* End optimization macro defs */
+
+
 struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
                                          char *str, int len)
 {
   struct json_object *obj = NULL;
-  char c;
+  char c = '\1';
 
   tok->char_offset = 0;
   tok->err = json_tokener_success;
 
-  do {
-    if(tok->char_offset == len) {
-      if(tok->depth == 0 && state == json_tokener_state_eatws &&
-        saved_state == json_tokener_state_finish)
-       tok->err = json_tokener_success;
-      else
-       tok->err = json_tokener_continue;
-      goto out;
-    }
+  while (POP_CHAR(c, tok)) {
 
-    c = *str;
   redo_char:
     switch(state) {
 
     case json_tokener_state_eatws:
-      if(isspace(c)) {
-       /* okay */
-      } else if(c == '/') {
+      /* Advance until we change state */
+      while (isspace(c)) {
+       if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
+         goto out;
+      }
+      if(c == '/') {
        printbuf_reset(tok->pb);
-       printbuf_memappend(tok->pb, &c, 1);
+       printbuf_memappend_fast(tok->pb, &c, 1);
        state = json_tokener_state_comment_start;
       } else {
        state = saved_state;
@@ -236,7 +273,7 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
       goto redo_char;
 
     case json_tokener_state_null:
-      printbuf_memappend(tok->pb, &c, 1);
+      printbuf_memappend_fast(tok->pb, &c, 1);
       if(strncasecmp(json_null_str, tok->pb->buf,
                     min(tok->st_pos+1, strlen(json_null_str))) == 0) {
        if(tok->st_pos == strlen(json_null_str)) {
@@ -261,25 +298,42 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
        tok->err = json_tokener_error_parse_comment;
        goto out;
       }
-      printbuf_memappend(tok->pb, &c, 1);
+      printbuf_memappend_fast(tok->pb, &c, 1);
       break;
 
     case json_tokener_state_comment:
-      if(c == '*') state = json_tokener_state_comment_end;
-      printbuf_memappend(tok->pb, &c, 1);
-      break;
+              {
+          /* Advance until we change state */
+          char *case_start = str;
+          while(c != '*') {
+            if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
+              printbuf_memappend_fast(tok->pb, case_start, str-case_start);
+              goto out;
+            } 
+          }
+          printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
+          state = json_tokener_state_comment_end;
+        }
+            break;
 
     case json_tokener_state_comment_eol:
-      if(c == '\n') {
+      {
+       /* Advance until we change state */
+       char *case_start = str;
+       while(c != '\n') {
+         if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
+           printbuf_memappend_fast(tok->pb, case_start, str-case_start);
+           goto out;
+         }
+       }
+       printbuf_memappend_fast(tok->pb, case_start, str-case_start);
        MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
        state = json_tokener_state_eatws;
-      } else {
-       printbuf_memappend(tok->pb, &c, 1);
       }
       break;
 
     case json_tokener_state_comment_end:
-      printbuf_memappend(tok->pb, &c, 1);
+      printbuf_memappend_fast(tok->pb, &c, 1);
       if(c == '/') {
        MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
        state = json_tokener_state_eatws;
@@ -289,15 +343,27 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
       break;
 
     case json_tokener_state_string:
-      if(c == tok->quote_char) {
-       current = json_object_new_string(tok->pb->buf);
-       saved_state = json_tokener_state_finish;
-       state = json_tokener_state_eatws;
-      } else if(c == '\\') {
-       saved_state = json_tokener_state_string;
-       state = json_tokener_state_string_escape;
-      } else {
-       printbuf_memappend(tok->pb, &c, 1);
+      {
+       /* Advance until we change state */
+       char *case_start = str;
+       while(1) {
+         if(c == tok->quote_char) {
+           printbuf_memappend_fast(tok->pb, case_start, str-case_start);
+           current = json_object_new_string(tok->pb->buf);
+           saved_state = json_tokener_state_finish;
+           state = json_tokener_state_eatws;
+           break;
+         } else if(c == '\\') {
+           printbuf_memappend_fast(tok->pb, case_start, str-case_start);
+           saved_state = json_tokener_state_string;
+           state = json_tokener_state_string_escape;
+           break;
+         }
+         if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
+           printbuf_memappend_fast(tok->pb, case_start, str-case_start);
+           goto out;
+         }
+       }
       }
       break;
 
@@ -306,17 +372,17 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
       case '"':
       case '\\':
       case '/':
-       printbuf_memappend(tok->pb, &c, 1);
+       printbuf_memappend_fast(tok->pb, &c, 1);
        state = saved_state;
        break;
       case 'b':
       case 'n':
       case 'r':
       case 't':
-       if(c == 'b') printbuf_memappend(tok->pb, "\b", 1);
-       else if(c == 'n') printbuf_memappend(tok->pb, "\n", 1);
-       else if(c == 'r') printbuf_memappend(tok->pb, "\r", 1);
-       else if(c == 't') printbuf_memappend(tok->pb, "\t", 1);
+       if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
+       else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
+       else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
+       else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
        state = saved_state;
        break;
       case 'u':
@@ -331,33 +397,46 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
       break;
 
     case json_tokener_state_escape_unicode:
-      if(strchr(json_hex_chars, c)) {
-       tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
-       if(tok->st_pos == 4) {
-         unsigned char utf_out[3];
-         if (tok->ucs_char < 0x80) {
-           utf_out[0] = tok->ucs_char;
-           printbuf_memappend(tok->pb, (char*)utf_out, 1);
-         } else if (tok->ucs_char < 0x800) {
-           utf_out[0] = 0xc0 | (tok->ucs_char >> 6);
-           utf_out[1] = 0x80 | (tok->ucs_char & 0x3f);
-           printbuf_memappend(tok->pb, (char*)utf_out, 2);
-         } else {
-           utf_out[0] = 0xe0 | (tok->ucs_char >> 12);
-           utf_out[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
-           utf_out[2] = 0x80 | (tok->ucs_char & 0x3f);
-           printbuf_memappend(tok->pb, (char*)utf_out, 3);
-         }
-         state = saved_state;
+            /* Note that the following code is inefficient for handling large
+       * chunks of extended chars, calling printbuf_memappend() once
+       * for each multi-byte character of input.
+       * This is a good area for future optimization.
+       */
+       {
+         /* Advance until we change state */
+         while(1) {
+           if(strchr(json_hex_chars, c)) {
+             tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
+             if(tok->st_pos == 4) {
+               unsigned char utf_out[3];
+               if (tok->ucs_char < 0x80) {
+                 utf_out[0] = tok->ucs_char;
+                 printbuf_memappend_fast(tok->pb, (char*)utf_out, 1);
+               } else if (tok->ucs_char < 0x800) {
+                 utf_out[0] = 0xc0 | (tok->ucs_char >> 6);
+                 utf_out[1] = 0x80 | (tok->ucs_char & 0x3f);
+                 printbuf_memappend_fast(tok->pb, (char*)utf_out, 2);
+               } else {
+                 utf_out[0] = 0xe0 | (tok->ucs_char >> 12);
+                 utf_out[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
+                 utf_out[2] = 0x80 | (tok->ucs_char & 0x3f);
+                 printbuf_memappend_fast(tok->pb, (char*)utf_out, 3);
+               }
+               state = saved_state;
+               break;
+             }
+           } else {
+             tok->err = json_tokener_error_parse_string;
+             goto out;
+                 }
+         if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok))
+           goto out;
        }
-      } else {
-       tok->err = json_tokener_error_parse_string;
-       goto out;
       }
       break;
 
     case json_tokener_state_boolean:
-      printbuf_memappend(tok->pb, &c, 1);
+      printbuf_memappend_fast(tok->pb, &c, 1);
       if(strncasecmp(json_true_str, tok->pb->buf,
                     min(tok->st_pos+1, strlen(json_true_str))) == 0) {
        if(tok->st_pos == strlen(json_true_str)) {
@@ -382,23 +461,35 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
       break;
 
     case json_tokener_state_number:
-      if(c && strchr(json_number_chars, c)) {
-       printbuf_memappend(tok->pb, &c, 1);     
-       if(c == '.' || c == 'e' || c == 'E') tok->is_double = 1;
-      } else {
-       int numi;
-       double numd;
-       if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) {
-         current = json_object_new_int(numi);
-       } else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
-         current = json_object_new_double(numd);
-       } else {
-         tok->err = json_tokener_error_parse_number;
-         goto out;
+      {
+       /* Advance until we change state */
+       char *case_start = str;
+       int case_len=0;
+       while(c && strchr(json_number_chars, c)) {
+         ++case_len;
+         if(c == '.' || c == 'e') tok->is_double = 1;
+         if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
+           printbuf_memappend_fast(tok->pb, case_start, case_len);
+           goto out;
+         }
        }
-       saved_state = json_tokener_state_finish;
-       state = json_tokener_state_eatws;
-       goto redo_char;
+        if (case_len>0)
+          printbuf_memappend_fast(tok->pb, case_start, case_len);
+      }
+      {
+        int numi;
+        double numd;
+        if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) {
+          current = json_object_new_int(numi);
+        } else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
+          current = json_object_new_double(numd);
+        } else {
+          tok->err = json_tokener_error_parse_number;
+          goto out;
+        }
+        saved_state = json_tokener_state_finish;
+        state = json_tokener_state_eatws;
+        goto redo_char;
       }
       break;
 
@@ -452,15 +543,27 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
       break;
 
     case json_tokener_state_object_field:
-      if(c == tok->quote_char) {
-       obj_field_name = strdup(tok->pb->buf);
-       saved_state = json_tokener_state_object_field_end;
-       state = json_tokener_state_eatws;
-      } else if(c == '\\') {
-       saved_state = json_tokener_state_object_field;
-       state = json_tokener_state_string_escape;
-      } else {
-       printbuf_memappend(tok->pb, &c, 1);
+      {
+       /* Advance until we change state */
+       char *case_start = str;
+       while(1) {
+         if(c == tok->quote_char) {
+           printbuf_memappend_fast(tok->pb, case_start, str-case_start);
+           obj_field_name = strdup(tok->pb->buf);
+           saved_state = json_tokener_state_object_field_end;
+           state = json_tokener_state_eatws;
+           break;
+         } else if(c == '\\') {
+           printbuf_memappend_fast(tok->pb, case_start, str-case_start);
+           saved_state = json_tokener_state_object_field;
+           state = json_tokener_state_string_escape;
+           break;
+         }
+         if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
+           printbuf_memappend_fast(tok->pb, case_start, str-case_start);
+           goto out;
+         }
+       }
       }
       break;
 
@@ -506,15 +609,17 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
       break;
 
     }
-    str++;
-    tok->char_offset++;
-  } while(c);
-
-  if(state != json_tokener_state_finish &&
-     saved_state != json_tokener_state_finish)
-    tok->err = json_tokener_error_parse_eof;
+    if (!ADVANCE_CHAR(str, tok))
+      goto out;
+  } /* while(POP_CHAR) */
 
  out:
+  if (!c) { /* We hit an eof char (0) */
+    if(state != json_tokener_state_finish &&
+       saved_state != json_tokener_state_finish)
+      tok->err = json_tokener_error_parse_eof;
+  }
+
   if(tok->err == json_tokener_success) return json_object_get(current);
   MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
           json_tokener_errors[tok->err], tok->char_offset);
index a2182f48e78dd48aca231586c1987753a1c7857a..a9c711cebad0c4eecb8eceffbbc18f4032445c65 100644 (file)
@@ -7,6 +7,10 @@
  * This library is free software; you can redistribute it and/or modify
  * it under the terms of the MIT license. See COPYING for details.
  *
+ *
+ * Copyright (c) 2008-2009 Yahoo! Inc.  All rights reserved.
+ * The copyrights to the contents of this file are licensed under the MIT License
+ * (http://www.opensource.org/licenses/mit-license.php)
  */
 
 #include "config.h"
@@ -118,16 +122,15 @@ int sprintbuf(struct printbuf *p, const char *msg, ...)
      if output is truncated whereas some return the number of bytes that
      would have been writen - this code handles both cases. */
   if(size == -1 || size > 127) {
-    int ret;
     va_start(ap, msg);
-    size = vasprintf(&t, msg, ap);
+    if((size = vasprintf(&t, msg, ap)) == -1) return -1;
     va_end(ap);
-    if(size == -1) return -1;
-    ret = printbuf_memappend(p, t, size);
+    printbuf_memappend(p, t, size);
     free(t);
-    return ret;
+    return size;
   } else {
-    return printbuf_memappend(p, buf, size);
+    printbuf_memappend(p, buf, size);
+    return size;
   }
 }
 
index 20c81cb7af535a73fb59f4bc2e66de7f502e974a..53fa9216acf975a33098bb95683e4b4334284ba4 100644 (file)
@@ -7,6 +7,10 @@
  * This library is free software; you can redistribute it and/or modify
  * it under the terms of the MIT license. See COPYING for details.
  *
+ *
+ * Copyright (c) 2008-2009 Yahoo! Inc.  All rights reserved.
+ * The copyrights to the contents of this file are licensed under the MIT License
+ * (http://www.opensource.org/licenses/mit-license.php)
  */
 
 #ifndef _printbuf_h_
@@ -27,9 +31,23 @@ struct printbuf {
 extern struct printbuf*
 printbuf_new(void);
 
+/* As an optimization, printbuf_memappend is defined as a macro that
+ * handles copying data if the buffer is large enough; otherwise it
+ * invokes printbuf_memappend_real() which performs the heavy lifting
+ * of realloc()ing the buffer and copying data.
+ */
 extern int
 printbuf_memappend(struct printbuf *p, const char *buf, int size);
 
+#define printbuf_memappend_fast(p, bufptr, bufsize)          \
+do {                                                         \
+  if ((p->size - p->bpos) > bufsize) {                       \
+    memcpy(p->buf + p->bpos, (bufptr), bufsize);             \
+    p->bpos += bufsize;                                      \
+    p->buf[p->bpos]= '\0';                                   \
+  } else {  printbuf_memappend(p, (bufptr), bufsize); }      \
+} while (0)
+
 extern int
 sprintbuf(struct printbuf *p, const char *msg, ...);