From: drh <> Date: Tue, 28 Nov 2023 20:25:23 +0000 (+0000) Subject: Attempt to get json_extract() working with pure JSONB only, and without X-Git-Tag: version-3.45.0~116^2~47^2~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1cab41e2906130ec64af0381d5a31f017133b248;p=thirdparty%2Fsqlite.git Attempt to get json_extract() working with pure JSONB only, and without the use of JsonNode. Mostly working, but there are some differences from legacy in corner cases. FossilOrigin-Name: 8c324af1eca27e86adc45622af4f3b06a67a3f968596ac58aa7434b1f6f05f3c --- diff --git a/manifest b/manifest index 510a2234e3..8d932ccf28 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C The\sjson_remove()\sfunction\snow\suses\sonly\sJSONB,\snever\sJsonNodes,\sinternally. -D 2023-11-28T18:16:02.028 +C Attempt\sto\sget\sjson_extract()\sworking\swith\spure\sJSONB\sonly,\sand\swithout\nthe\suse\sof\sJsonNode.\s\sMostly\sworking,\sbut\sthere\sare\ssome\sdifferences\sfrom\nlegacy\sin\scorner\scases. +D 2023-11-28T20:25:23.336 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -688,7 +688,7 @@ F src/hash.h 3340ab6e1d13e725571d7cee6d3e3135f0779a7d8e76a9ce0a85971fa3953c51 F src/hwtime.h f9c2dfb84dce7acf95ce6d289e46f5f9d3d1afd328e53da8f8e9008e3b3caae6 F src/in-operator.md 10cd8f4bcd225a32518407c2fb2484089112fd71 F src/insert.c 3f0a94082d978bbdd33c38fefea15346c6c6bffb70bc645a71dc0f1f87dd3276 -F src/json.c 442ff1ce9ebb82728aca5ddfcab21af46c3230e9f672307b91489f43cd6d6fdc +F src/json.c bbe9cb34174bfbc0e067a480e431ffc8f0d6cc2cfcad8a1001472bbd00ca620a F src/legacy.c d7874bc885906868cd51e6c2156698f2754f02d9eee1bae2d687323c3ca8e5aa F src/loadext.c 7432c944ff197046d67a1207790a1b13eec4548c85a9457eb0896bb3641dfb36 F src/main.c 1b89f3de98d1b59fec5bac1d66d6ece21f703821b8eaa0d53d9604c35309f6f9 @@ -1325,11 +1325,11 @@ F test/json/json-generator.tcl dc0dd0f393800c98658fc4c47eaa6af29d4e17527380cd286 F test/json/json-q1-b.txt 606818a5fba6d9e418c9f4ea7d8418af026775042dad81439b72447a147a462c F test/json/json-q1.txt 65f9d1cdcc4cffa9823fb73ed936aae5658700cd001fde448f68bfb91c807307 F test/json/json-speed-check.sh b060a9a6c696c0a807d8929400fa11bd7113edc58b0d66b9795f424f8d0db326 x -F test/json101.test 643936557daeb2eeeb839e9d87a682c7d7c6f6aee66045eedd8613cb8471d8d8 +F test/json101.test 8f5d1a3350c36945c8b58f538e1c92cfd87fd50ab6f5e3d5f4cf3cdb03b9546d F test/json102.test 557a46e16df1aa9bdbc4076a71a45814ea0e7503d6621d87d42a8c04cbc2b0ef F test/json103.test 53df87f83a4e5fa0c0a56eb29ff6c94055c6eb919f33316d62161a8880112dbe F test/json104.test 1b844a70cddcfa2e4cd81a5db0657b2e61e7f00868310f24f56a9ba0114348c1 -F test/json105.test 11670a4387f4308ae0318cadcbd6a918ea7edcd19fbafde020720a073952675d +F test/json105.test e64a8d73677fbae67886642cd5076e2ef3efe89f8483b87595cf9c030216c9bd F test/json501.test ab168a12eb6eb14d479f8c1cdae3ac062fd5a4679f17f976e96f1af518408330 F test/json502.test 98c38e3c4573841028a1381dfb81d4c3f9b105d39668167da10d055e503f6d0b F test/jsonb01.test cace70765b36a36aec9a85a41ea65667d3bbf647d4400ddc3ac76f8fe7d94f90 @@ -2145,8 +2145,11 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 11aba347ff7c639500eec904e212eabe889b077351b946cfeac2b74b9703672a -R 82f8a4e62603e32381a0117063382c7a +P b69786e746ae2b927b64d9871fd120b7f8f06cc53739fd46a4da51aa16cf8576 +R ccd1a96290081a160a58be4f68f67cce +T *branch * jsonb-extract +T *sym-jsonb-extract * +T -sym-jsonb * U drh -Z 37d5559550d9d10448cc0117446ead8c +Z fece822412df4bcd481f5f301c9018f9 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index a64ec6f678..2cee28775a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b69786e746ae2b927b64d9871fd120b7f8f06cc53739fd46a4da51aa16cf8576 \ No newline at end of file +8c324af1eca27e86adc45622af4f3b06a67a3f968596ac58aa7434b1f6f05f3c \ No newline at end of file diff --git a/src/json.c b/src/json.c index 4baffa7fa3..c259ae1d87 100644 --- a/src/json.c +++ b/src/json.c @@ -1110,180 +1110,6 @@ static u32 jsonHexToInt4(const char *z){ return v; } -/* -** Make the return value from an SQL function be the SQL value of -** JsonNode pNode. -** -** If pNode is an atom (not an array or object) then the value returned -** is a pure SQL value - an SQLITE_INTEGER, SQLITE_REAL, SQLITE_TEXT, or -** SQLITE_NULL. However, if pNode is a JSON array or object, then the -** value returned is either RFC-8259 JSON text or a BLOB in the JSONB -** format, depending on the JSON_BLOB flag of the function user-data. -*/ -static void jsonReturnFromNode( - JsonParse *pParse, /* Complete JSON parse tree */ - JsonNode *pNode, /* Node to return */ - sqlite3_context *pCtx, /* Return value for this function */ - int omitSubtype /* Do not call sqlite3_result_subtype() */ -){ - switch( pNode->eType ){ - default: { - assert( pNode->eType==JSON_NULL ); - sqlite3_result_null(pCtx); - break; - } - case JSON_TRUE: { - sqlite3_result_int(pCtx, 1); - break; - } - case JSON_FALSE: { - sqlite3_result_int(pCtx, 0); - break; - } - case JSON_INT: { - sqlite3_int64 i = 0; - int rc; - int bNeg = 0; - const char *z; - char *zz; - sqlite3 *db = sqlite3_context_db_handle(pCtx); - - assert( pNode->eU==1 ); - zz = sqlite3DbStrNDup(db, pNode->u.zJContent, pNode->n); - if( zz==0 ){ - sqlite3_result_error_nomem(pCtx); - return; - } - z = zz; - if( z[0]=='-' ){ z++; bNeg = 1; } - else if( z[0]=='+' ){ z++; } - rc = sqlite3DecOrHexToI64(z, &i); - sqlite3DbFree(db, zz); - if( rc<=1 ){ - sqlite3_result_int64(pCtx, bNeg ? -i : i); - }else if( rc==3 && bNeg ){ - sqlite3_result_int64(pCtx, SMALLEST_INT64); - }else{ - goto to_double; - } - break; - } - case JSON_REAL: { - double r; - const char *z; - assert( pNode->eU==1 ); - to_double: - z = pNode->u.zJContent; - sqlite3AtoF(z, &r, pNode->n, SQLITE_UTF8); - sqlite3_result_double(pCtx, r); - break; - } - case JSON_STRING: { - if( pNode->jnFlags & JNODE_RAW ){ - assert( pNode->eU==1 ); - sqlite3_result_text(pCtx, pNode->u.zJContent, pNode->n, - SQLITE_TRANSIENT); - }else if( (pNode->jnFlags & JNODE_ESCAPE)==0 ){ - /* JSON formatted without any backslash-escapes */ - assert( pNode->eU==1 ); - sqlite3_result_text(pCtx, pNode->u.zJContent, pNode->n, - SQLITE_TRANSIENT); - }else{ - /* Translate JSON formatted string into raw text */ - u32 i; - u32 n = pNode->n; - const char *z; - char *zOut; - u32 j; - u32 nOut = n; - assert( pNode->eU==1 ); - z = pNode->u.zJContent; - zOut = sqlite3_malloc( nOut+1 ); - if( zOut==0 ){ - sqlite3_result_error_nomem(pCtx); - break; - } - for(i=0, j=0; i>6)); - zOut[j++] = 0x80 | (v&0x3f); - }else{ - u32 vlo; - if( (v&0xfc00)==0xd800 - && i>18); - zOut[j++] = 0x80 | ((v>>12)&0x3f); - zOut[j++] = 0x80 | ((v>>6)&0x3f); - zOut[j++] = 0x80 | (v&0x3f); - }else{ - zOut[j++] = 0xe0 | (v>>12); - zOut[j++] = 0x80 | ((v>>6)&0x3f); - zOut[j++] = 0x80 | (v&0x3f); - } - } - continue; - }else if( c=='b' ){ - c = '\b'; - }else if( c=='f' ){ - c = '\f'; - }else if( c=='n' ){ - c = '\n'; - }else if( c=='r' ){ - c = '\r'; - }else if( c=='t' ){ - c = '\t'; - }else if( c=='v' ){ - c = '\v'; - }else if( c=='\'' || c=='"' || c=='/' || c=='\\' ){ - /* pass through unchanged */ - }else if( c=='0' ){ - c = 0; - }else if( c=='x' ){ - c = (jsonHexToInt(z[i+1])<<4) | jsonHexToInt(z[i+2]); - i += 2; - }else if( c=='\r' && z[i+1]=='\n' ){ - i++; - continue; - }else if( 0xe2==(u8)c ){ - assert( 0x80==(u8)z[i+1] ); - assert( 0xa8==(u8)z[i+2] || 0xa9==(u8)z[i+2] ); - i += 2; - continue; - }else{ - continue; - } - } /* end if( c=='\\' ) */ - zOut[j++] = c; - } /* end for() */ - zOut[j] = 0; - sqlite3_result_text(pCtx, zOut, j, sqlite3_free); - } - break; - } - case JSON_ARRAY: - case JSON_OBJECT: { - jsonReturnNodeAsJson(pParse, pNode, pCtx, 0, omitSubtype); - break; - } - } -} - /* ** A macro to hint to the compiler that a function should not be ** inlined. @@ -4217,6 +4043,7 @@ static void jsonReturnFromBlob( } } +#if 0 /* Do a JSON_EXTRACT(JSON, PATH) on a when JSON is a BLOB. */ static void jsonExtractFromBlob( @@ -4277,6 +4104,7 @@ static void jsonExtractFromBlob( sqlite3_free(zMsg); } } +#endif /* ** pArg is a function argument that might be an SQL value or a JSON @@ -4819,6 +4647,19 @@ static void jsonArrayLengthFunc( sqlite3_result_int64(ctx, n); } +/* +** Generate a bad path error for json_extract() +*/ +static void jsonExtractBadPathError( + sqlite3_context *ctx, /* The function call containing the error */ + const char *zPath /* The path with the problem */ +){ + sqlite3 *db = sqlite3_context_db_handle(ctx); + char *zMsg = sqlite3MPrintf(db, "bad JSON path: %Q", zPath); + sqlite3_result_error(ctx, zMsg, -1); + sqlite3DbFree(db, zMsg); +} + /* ** json_extract(JSON, PATH, ...) ** "->"(JSON,PATH) @@ -4844,83 +4685,94 @@ static void jsonExtractFunc( int argc, sqlite3_value **argv ){ - JsonParse *p; /* The parse */ - JsonNode *pNode; - const char *zPath; - int flags = SQLITE_PTR_TO_INT(sqlite3_user_data(ctx)); - JsonString jx; + JsonParse *p = 0; /* The parse */ + int flags; /* Flags associated with the function */ + int i; /* Loop counter */ + JsonString jx; /* String for array result */ if( argc<2 ) return; - if( jsonFuncArgMightBeBinary(argv[0]) && argc==2 ){ - jsonExtractFromBlob(ctx, argv[0], argv[1], flags); - return; - } - p = jsonParseCached(ctx, argv[0], ctx, 0); + p = jsonParseFuncArg(ctx, argv[0], 0); if( p==0 ) return; - if( argc==2 ){ + flags = SQLITE_PTR_TO_INT(sqlite3_user_data(ctx)); + jsonStringInit(&jx, ctx); + if( argc>2 ){ + jsonAppendChar(&jx, '['); + } + for(i=1; i and ->> operators accept abbreviated PATH arguments. This - ** is mostly for compatibility with PostgreSQL, but also for - ** convenience. - ** - ** NUMBER ==> $[NUMBER] // PG compatible - ** LABEL ==> $.LABEL // PG compatible - ** [NUMBER] ==> $[NUMBER] // Not PG. Purely for convenience - */ - jsonStringInit(&jx, ctx); - if( sqlite3Isdigit(zPath[0]) ){ - jsonAppendRawNZ(&jx, "$[", 2); - jsonAppendRaw(&jx, zPath, (int)strlen(zPath)); - jsonAppendRawNZ(&jx, "]", 2); - }else{ - jsonAppendRawNZ(&jx, "$.", 1 + (zPath[0]!='[')); - jsonAppendRaw(&jx, zPath, (int)strlen(zPath)); - jsonAppendChar(&jx, 0); - } - pNode = jx.eErr ? 0 : jsonLookup(p, jx.zBuf, 0, ctx); - jsonStringReset(&jx); - }else{ - pNode = jsonLookup(p, zPath, 0, ctx); + const char *zPath = (const char*)sqlite3_value_text(argv[i]); + u32 j; + if( zPath==0 ) goto json_extract_error; + if( zPath[0]=='$' ){ + j = jsonLookupBlobStep(p, 0, zPath+1, 0); + }else if( (flags & JSON_ABPATH) ){ + /* The -> and ->> operators accept abbreviated PATH arguments. This + ** is mostly for compatibility with PostgreSQL, but also for + ** convenience. + ** + ** NUMBER ==> $[NUMBER] // PG compatible + ** LABEL ==> $.LABEL // PG compatible + ** [NUMBER] ==> $[NUMBER] // Not PG. Purely for convenience + */ + jsonStringInit(&jx, ctx); + if( sqlite3Isdigit(zPath[0]) ){ + jsonAppendRawNZ(&jx, "[", 1); + jsonAppendRaw(&jx, zPath, (int)strlen(zPath)); + jsonAppendRawNZ(&jx, "]", 2); + }else if( zPath[0]!='[' ){ + jsonAppendRawNZ(&jx, ".", 1); + jsonAppendRaw(&jx, zPath, (int)strlen(zPath)); + jsonAppendChar(&jx, 0); } - if( pNode ){ + jsonStringTerminate(&jx); + j = jsonLookupBlobStep(p, 0, jx.zBuf, 0); + jsonStringReset(&jx); + }else{ + jsonExtractBadPathError(ctx, zPath); + goto json_extract_error; + } + if( jnBlob ){ + if( argc==2 ){ if( flags & JSON_JSON ){ - jsonReturnNodeAsJson(p, pNode, ctx, 0, 0); + jsonStringInit(&jx, ctx); + jsonXlateBlobToText(p, j, &jx); + jsonReturnString(&jx); + jsonStringReset(&jx); + sqlite3_result_subtype(ctx, JSON_SUBTYPE); }else{ - jsonReturnFromNode(p, pNode, ctx, 1); + jsonReturnFromBlob(p, j, ctx, 0); + if( (flags & JSON_SQL)==0 && (p->aBlob[j]&0x0f)>=JSONB_ARRAY ){ + sqlite3_result_subtype(ctx, JSON_SUBTYPE); + } } + }else{ + jsonAppendSeparator(&jx); + jsonXlateBlobToText(p, j, &jx); } - }else{ - pNode = jsonLookup(p, zPath, 0, ctx); - if( p->nErr==0 && pNode ) jsonReturnFromNode(p, pNode, ctx, 0); - } - }else{ - /* Two or more PATH arguments results in a JSON array with each - ** element of the array being the value selected by one of the PATHs */ - int i; - jsonStringInit(&jx, ctx); - jsonAppendChar(&jx, '['); - for(i=1; inErr ) break; - jsonAppendSeparator(&jx); - if( pNode ){ - jsonXlateNodeToText(p, pNode, &jx); + }else if( j==JSON_BLOB_NOTFOUND ){ + if( argc==2 ){ + goto json_extract_error; /* Return NULL if not found */ }else{ + jsonAppendSeparator(&jx); jsonAppendRawNZ(&jx, "null", 4); } + }else if( j==JSON_BLOB_ERROR ){ + sqlite3_result_error(ctx, "malformed JSON", -1); + goto json_extract_error; + }else{ + jsonExtractBadPathError(ctx, zPath); + goto json_extract_error; } - if( i==argc ){ - jsonAppendChar(&jx, ']'); - jsonReturnString(&jx); - sqlite3_result_subtype(ctx, JSON_SUBTYPE); - } - jsonStringReset(&jx); } + if( argc>2 ){ + jsonAppendChar(&jx, ']'); + jsonReturnString(&jx); + sqlite3_result_subtype(ctx, JSON_SUBTYPE); + } +json_extract_error: + jsonStringReset(&jx); + jsonParseFree(p); + return; } /* This is the RFC 7396 MergePatch algorithm. diff --git a/test/json101.test b/test/json101.test index ebaf0f3b34..3c3b1a5ec0 100644 --- a/test/json101.test +++ b/test/json101.test @@ -974,7 +974,7 @@ do_execsql_test json101-18.4 { } {6} do_catchsql_test json101-18.5 { SELECT json_extract('{"":8}', '$.'); -} {1 {JSON path error near ''}} +} {1 {bad JSON path: '$.'}} # 2022-08-29 https://sqlite.org/forum/forumpost/9b9e4716c0d7bbd1 # This is not a problem specifically with JSON functions. It is diff --git a/test/json105.test b/test/json105.test index 83face188e..5592191c22 100644 --- a/test/json105.test +++ b/test/json105.test @@ -96,18 +96,18 @@ json_replace_test 80 {'$.b[#-1]','AAA','$.b[#-1]','BBB'} \ do_catchsql_test json105-6.10 { SELECT json_extract(j, '$.b[#-]') FROM t1; -} {1 {JSON path error near '[#-]'}} +} {1 {bad JSON path: '$.b[#-]'}} do_catchsql_test json105-6.20 { SELECT json_extract(j, '$.b[#9]') FROM t1; -} {1 {JSON path error near '[#9]'}} +} {1 {bad JSON path: '$.b[#9]'}} do_catchsql_test json105-6.30 { SELECT json_extract(j, '$.b[#+2]') FROM t1; -} {1 {JSON path error near '[#+2]'}} +} {1 {bad JSON path: '$.b[#+2]'}} do_catchsql_test json105-6.40 { SELECT json_extract(j, '$.b[#-1') FROM t1; -} {1 {JSON path error near '[#-1'}} +} {1 {bad JSON path: '$.b[#-1'}} do_catchsql_test json105-6.50 { SELECT json_extract(j, '$.b[#-1x]') FROM t1; -} {1 {JSON path error near '[#-1x]'}} +} {1 {bad JSON path: '$.b[#-1x]'}} finish_test