From 1bce6b468edd0e0763a24a9f126aa9c5461a0689 Mon Sep 17 00:00:00 2001 From: stephan Date: Sun, 6 Aug 2023 10:14:53 +0000 Subject: [PATCH] Bind sqlite3_db_filename() and (closely related) (A) add many more docs about the UTF-8/MUTF-8 discrepancy (B) start adding internals to enable us to perform the standard-UTF-8-to-Java conversion from C. FossilOrigin-Name: 586720fa714ac74491cd85d0c6645242e55e5989ad312ef6e15e0b0acc6906ff --- ext/jni/src/c/sqlite3-jni.c | 76 ++++++++++++++++++-- ext/jni/src/c/sqlite3-jni.h | 8 +++ ext/jni/src/org/sqlite/jni/Authorizer.java | 8 +-- ext/jni/src/org/sqlite/jni/SQLite3Jni.java | 81 +++++++++++++++++++++- ext/jni/src/org/sqlite/jni/Tester1.java | 1 + manifest | 20 +++--- manifest.uuid | 2 +- 7 files changed, 171 insertions(+), 25 deletions(-) diff --git a/ext/jni/src/c/sqlite3-jni.c b/ext/jni/src/c/sqlite3-jni.c index aba4db5a91..646ecab06c 100644 --- a/ext/jni/src/c/sqlite3-jni.c +++ b/ext/jni/src/c/sqlite3-jni.c @@ -187,7 +187,10 @@ #define PtrGet_sqlite3_value(OBJ) getNativePointer(env,OBJ,S3ClassNames.sqlite3_value) #define PtrGet_sqlite3_context(OBJ) getNativePointer(env,OBJ,S3ClassNames.sqlite3_context) /* Helpers for Java value reference management. */ -#define REF_G(VAR) (*env)->NewGlobalRef(env, VAR) +static inline jobject new_global_ref(JNIEnv *env, jobject v){ + return v ? (*env)->NewGlobalRef(env, v) : NULL; +} +#define REF_G(VAR) new_global_ref(env, (VAR)) #define REF_L(VAR) (*env)->NewLocalRef(env, VAR) #define UNREF_G(VAR) if(VAR) (*env)->DeleteGlobalRef(env, (VAR)) #define UNREF_L(VAR) if(VAR) (*env)->DeleteLocalRef(env, (VAR)) @@ -337,9 +340,15 @@ struct NphCacheLine { typedef struct JNIEnvCacheLine JNIEnvCacheLine; struct JNIEnvCacheLine { JNIEnv *env /* env in which this cache entry was created */; + //! The various refs to global classes might be cacheable a single + // time globally. Information online seems inconsistent on that + // point. jclass globalClassObj /* global ref to java.lang.Object */; jclass globalClassLong /* global ref to java.lang.Long */; + jclass globalClassString /* global ref to java.lang.String */; + jobject globalClassCharsetUtf8 /* global ref to StandardCharset.UTF_8 */; jmethodID ctorLong1 /* the Long(long) constructor */; + jmethodID ctorStringBA /* the String(byte[],Charset) constructor */; jobject currentStmt /* Current Java sqlite3_stmt object being prepared, stepped, reset, or finalized. Needed for tracing, the @@ -563,11 +572,32 @@ static JNIEnvCacheLine * S3Global_JNIEnvCache_cache(JNIEnv * const env){ row->env = env; row->globalClassObj = REF_G((*env)->FindClass(env,"java/lang/Object")); EXCEPTION_IS_FATAL("Error getting reference to Object class."); + row->globalClassLong = REF_G((*env)->FindClass(env,"java/lang/Long")); EXCEPTION_IS_FATAL("Error getting reference to Long class."); row->ctorLong1 = (*env)->GetMethodID(env, row->globalClassLong, "", "(J)V"); EXCEPTION_IS_FATAL("Error getting reference to Long constructor."); + + row->globalClassString = REF_G((*env)->FindClass(env,"java/lang/String")); + EXCEPTION_IS_FATAL("Error getting reference to String class."); + row->ctorStringBA = + (*env)->GetMethodID(env, row->globalClassString, + "", "([BLjava/nio/charset/Charset;)V"); + EXCEPTION_IS_FATAL("Error getting reference to String(byte[],Charset) ctor."); + + { /* StandardCharsets.UTF_8 */ + jfieldID fUtf8; + jclass const klazzSC = + (*env)->FindClass(env,"java/nio/charset/StandardCharsets"); + EXCEPTION_IS_FATAL("Error getting reference to StndardCharsets class."); + fUtf8 = (*env)->GetStaticFieldID(env, klazzSC, "UTF_8", + "Ljava/nio/charset/Charset;"); + EXCEPTION_IS_FATAL("Error getting StndardCharsets.UTF_8 field."); + row->globalClassCharsetUtf8 = + REF_G((*env)->GetStaticObjectField(env, klazzSC, fUtf8)); + EXCEPTION_IS_FATAL("Error getting reference to StandardCharsets.UTF_8."); + } return row; } @@ -639,6 +669,9 @@ static void JNIEnvCacheLine_clear(JNIEnvCacheLine * const p){ int i; UNREF_G(p->globalClassObj); UNREF_G(p->globalClassLong); + UNREF_G(p->globalClassString); + UNREF_G(p->globalClassCharsetUtf8); + UNREF_G(p->currentStmt); #ifdef SQLITE_ENABLE_FTS5 UNREF_G(p->jFtsExt); UNREF_G(p->jPhraseIter.klazz); @@ -1993,12 +2026,43 @@ JDECL(jint,1create_1function)(JENV_JSELF, jobject jDb, jstring jFuncName, return create_function(env, jDb, jFuncName, nArg, eTextRep, jFunctor); } -/* -JDECL(jint,1create_1window_1function)(JENV_JSELF, jstring jFuncName, jint nArg, - jint eTextRep, jobject jFunctor){ - return create_function_mega(env, jFuncName, nArg, eTextRep, jFunctor); + +JDECL(jbyteArray,1db_1filename)(JENV_JSELF, jobject jDb, jbyteArray jDbName){ +#if 1 + PerDbStateJni * const ps = PerDbStateJni_for_db(env, jDb, 0, 0); + jbyte *zFilename = (ps && jDbName) ? JBA_TOC(jDbName) : 0; + const char *zRv; + jbyteArray jRv = 0; + + if( !ps || (jDbName && !zFilename) ) return 0; + zRv = sqlite3_db_filename(ps->pDb, (const char *)zFilename); + if( zRv ){ + const int n = sqlite3Strlen30(zRv); + jRv = (*env)->NewByteArray(env, (jint)n); + if( jRv ){ + (*env)->SetByteArrayRegion(env, jRv, 0, (jint)n, (const jbyte *)zRv); + } + } + JBA_RELEASE(jDbName, zFilename); + return jRv; +#else + /* For comparison, this impl expects a jstring jDbName and returns a + jstring for significant code savings but it's not + MUTF-8-safe. With this impl, the Java-side byte-array-using + sqlite3_db_filename() impl is unnecessary. */ + JDECL(jstring,1db_1filename)(JENV_JSELF, jobject jDb, jstring jDbName){ + PerDbStateJni * const ps = PerDbStateJni_for_db(env, jDb, 0, 0); + const char *zFilename = (ps && jDbName) ? JSTR_TOC(jDbName) : 0; + const char *zRv; + + if( !ps || (jDbName && !zFilename)) return 0; + zRv = sqlite3_db_filename(ps->pDb, zFilename ? zFilename : "main"); + JSTR_RELEASE(jDbName, zFilename); + return zRv ? (*env)->NewStringUTF(env, zRv) : 0; +} +#endif + } -*/ JDECL(jstring,1errmsg)(JENV_JSELF, jobject jpDb){ return (*env)->NewStringUTF(env, sqlite3_errmsg(PtrGet_sqlite3(jpDb))); diff --git a/ext/jni/src/c/sqlite3-jni.h b/ext/jni/src/c/sqlite3-jni.h index 45a046c99d..ba24cb391a 100644 --- a/ext/jni/src/c/sqlite3-jni.h +++ b/ext/jni/src/c/sqlite3-jni.h @@ -1091,6 +1091,14 @@ JNIEXPORT jint JNICALL Java_org_sqlite_jni_SQLite3Jni_sqlite3_1create_1function JNIEXPORT jint JNICALL Java_org_sqlite_jni_SQLite3Jni_sqlite3_1data_1count (JNIEnv *, jclass, jobject); +/* + * Class: org_sqlite_jni_SQLite3Jni + * Method: sqlite3_db_filename + * Signature: (Lorg/sqlite/jni/sqlite3;[B)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_sqlite_jni_SQLite3Jni_sqlite3_1db_1filename + (JNIEnv *, jclass, jobject, jbyteArray); + /* * Class: org_sqlite_jni_SQLite3Jni * Method: sqlite3_errcode diff --git a/ext/jni/src/org/sqlite/jni/Authorizer.java b/ext/jni/src/org/sqlite/jni/Authorizer.java index b10567716c..3fd6861fec 100644 --- a/ext/jni/src/org/sqlite/jni/Authorizer.java +++ b/ext/jni/src/org/sqlite/jni/Authorizer.java @@ -22,12 +22,8 @@ public interface Authorizer { callback, with one caveat: the string values passed here were initially (at the C level) encoded in standard UTF-8. If they contained any constructs which are not compatible with MUTF-8, - these strings will not have the expected values. The strings - passed through the authorizer would only be adversely affected by - that if the database tables and columns use "highly exotic" - names. Any names which contain no NUL bytes, nor characters - outside of the Basic Multilingual Plane are unaffected by this - discrepancy. + these strings will not have the expected values. For further + details, see the documentation for the SQLite3Jni class. Must not throw. */ diff --git a/ext/jni/src/org/sqlite/jni/SQLite3Jni.java b/ext/jni/src/org/sqlite/jni/SQLite3Jni.java index 1f7180f14a..ebf60c10f6 100644 --- a/ext/jni/src/org/sqlite/jni/SQLite3Jni.java +++ b/ext/jni/src/org/sqlite/jni/SQLite3Jni.java @@ -60,7 +60,67 @@ import java.lang.annotation.ElementType; https://sqlite.org/c3ref/intro.html - A small handful of Java-specific APIs have been added. + A handful of Java-specific APIs have been added. + + + ****************************************************************** + *** Warning regarding Java's Modified UTF-8 vs standard UTF-8: *** + ****************************************************************** + + SQLite internally uses UTF-8 encoding, whereas Java natively uses + UTF-16. Java JNI has routines for converting to and from UTF-8, + _but_ JNI uses what its docs call modified UTF-8 (see links below) + Care must be taken when converting Java strings to or from standard + UTF-8 to ensure that the proper conversion is performed. In short, + Java's `String.getBytes(StandardCharsets.UTF_8)` performs the proper + conversion in Java, and there are no JNI C APIs for that conversion + (JNI's `NewStringUTF()` requires its input to be in MUTF-8). + + The known consequences and limitations this discrepancy places on + the SQLite3 JNI binding include: + + - Any functions which return client-side data from a database + take extra care to perform proper conversion, at the cost of + efficiency. + + - Functions which return database identifiers require those + identifiers to have identical representations in UTF-8 and + MUTF-8. They do not perform such conversions (A) because of the + much lower risk of an encoding discrepancy and (B) to avoid + significant extra code involved (see both the Java- and C-side + implementations of sqlite3_db_filename() for an example). Names + of databases, tables, columns, collations, and functions MUST NOT + contain characters which differ in MUTF-8 and UTF-8, or certain + APIs will mis-translate them on their way between languages + (possibly leading to a crash). + + - sqlite3_trace_v2() is also currently affected by this, in that + it requires that traced SQL statements be compatible with + MUTF-8. The alternative would be to perform two extra layers of + conversion for that performance-sensitive function: one from + UTF-8 to a byte-array before passing the data from C to Java, + and then from byte-array to String in the tracer implementation. + + - C functions which take C-style strings without a length argument + require special care when taking input from Java. In particular, + Java strings converted to byte arrays for encoding purposes are + not NUL-terminated, and conversion to a Java byte array must be + careful to add one. Functions which take a length do not require + this. Search the SQLite3Jni class for "\0" for many examples. + + - Similarly, C-side code which deals with strings which might not be + NUL-terminated (e.g. while tokenizing in FTS5-related code) cannot + use JNI's new-string functions to return them to Java because none + of those APIs take a string-length argument. Such cases must + return byte arrays instead of strings. + + Further reading: + + - https://stackoverflow.com/questions/57419723 + - https://stackoverflow.com/questions/7921016 + - https://docs.oracle.com/javase/8/docs/api/java/lang/Character.html#unicode + - https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8 + */ public final class SQLite3Jni { static { @@ -84,7 +144,7 @@ public final class SQLite3Jni { undefined if any database objects are (A) still active at the time it is called _and_ (B) calls are subsequently made into the library with such a database. Doing so will, at best, lead to a - crash. It worst, it will lead to the db possibly misbehaving + crash. Azt worst, it will lead to the db possibly misbehaving because some of its Java-bound state has been cleared. There is no immediate harm in (A) so long as condition (B) is not met. This process does _not_ actually close any databases or finalize @@ -344,6 +404,23 @@ public final class SQLite3Jni { public static native int sqlite3_data_count(@NotNull sqlite3_stmt stmt); + /** In order to support the full range of UTF-8 filenames, we + require an extra layer of conversion via a byte[]. */ + private static native byte[] sqlite3_db_filename(@NotNull sqlite3 db, + @NotNull byte dbName[]); + + /** + As for the C API of the same name except that if dbName is null then + "main" is assumed. + */ + public static String sqlite3_db_filename(@NotNull sqlite3 db, + @Nullable String dbName){ + final byte[] bName = + (((null == dbName) ? "main" : dbName)+"\0").getBytes(StandardCharsets.UTF_8); + final byte[] rv = sqlite3_db_filename(db, bName); + return (null == rv) ? null : new String(rv, StandardCharsets.UTF_8); + } + public static native int sqlite3_errcode(@NotNull sqlite3 db); public static native int sqlite3_extended_errcode(@NotNull sqlite3 db); diff --git a/ext/jni/src/org/sqlite/jni/Tester1.java b/ext/jni/src/org/sqlite/jni/Tester1.java index a5020663a6..334484166c 100644 --- a/ext/jni/src/org/sqlite/jni/Tester1.java +++ b/ext/jni/src/org/sqlite/jni/Tester1.java @@ -734,6 +734,7 @@ public class Tester1 { rc = sqlite3_open(dbName, db2); ++metrics.dbOpen; affirm( 0 == rc ); + affirm( sqlite3_db_filename(db1, null).endsWith(dbName) ); final ValueHolder xDestroyed = new ValueHolder<>(false); final ValueHolder xBusyCalled = new ValueHolder<>(0); diff --git a/manifest b/manifest index 84a488eff3..bc94970f53 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Bind\ssqlite3_set_authorizer()\sto\sJNI. -D 2023-08-05T22:41:36.472 +C Bind\ssqlite3_db_filename()\sand\s(closely\srelated)\s(A)\sadd\smany\smore\sdocs\sabout\sthe\sUTF-8/MUTF-8\sdiscrepancy\s(B)\sstart\sadding\sinternals\sto\senable\sus\sto\sperform\sthe\sstandard-UTF-8-to-Java\sconversion\sfrom\sC. +D 2023-08-06T10:14:53.465 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -232,9 +232,9 @@ F ext/icu/icu.c c074519b46baa484bb5396c7e01e051034da8884bad1a1cb7f09bbe6be3f0282 F ext/icu/sqliteicu.h fa373836ed5a1ee7478bdf8a1650689294e41d0c89c1daab26e9ae78a32075a8 F ext/jni/GNUmakefile bb4cd99bd8da534215cb6d278f05a626283eb5d2e8aebdb4d35e548637d35a9a F ext/jni/README.md 6ff7e1f4100dee980434a6ee37a199b653bceec62e233a6e2ccde6e7ae0c58bf -F ext/jni/src/c/sqlite3-jni.c b81dcf92a1fbd7ada98dbc0f526b02e9e97e2be84cb331092e933c7d8feeafe2 -F ext/jni/src/c/sqlite3-jni.h 1db6075134d7efc71f634e4a212e41d0178346e68c7c615f220d753e4bd23382 -F ext/jni/src/org/sqlite/jni/Authorizer.java 189e7fa2155466ded5a52eed1ccb46581b5b45d70fcef49f538c0b93ea88e337 +F ext/jni/src/c/sqlite3-jni.c 88c18f2f1dd8064ca3d264bcda0df950b57bc0f5b9d8bfeb43bdd3f5be723ab8 +F ext/jni/src/c/sqlite3-jni.h 03c61c4f84c028169633392d7eb06caa6000e8bf3c0a3f7ac44e645dedbbfb9a +F ext/jni/src/org/sqlite/jni/Authorizer.java 8dde03bbe50896d2f426240a4af4dcb6d98b655af84fe6ed86e637f5d5ac1fc8 F ext/jni/src/org/sqlite/jni/BusyHandler.java 1b1d3e5c86cd796a0580c81b6af6550ad943baa25e47ada0dcca3aff3ebe978c F ext/jni/src/org/sqlite/jni/Collation.java 8dffbb00938007ad0967b2ab424d3c908413af1bbd3d212b9c9899910f1218d1 F ext/jni/src/org/sqlite/jni/CollationNeeded.java ebc7cd96d46a70daa76016a308e80f70a3f21d3282787c8d139aa840fdcb1bd7 @@ -250,8 +250,8 @@ F ext/jni/src/org/sqlite/jni/OutputPointer.java 013f2b5fe569d0585a695f5cfa605a3b F ext/jni/src/org/sqlite/jni/ProgressHandler.java 5979450e996416d28543f1d42634d308439565a99332a8bd84e424af667116cc F ext/jni/src/org/sqlite/jni/RollbackHook.java b04c8abcc6ade44a8a57129e33765793f69df0ba909e49ba18d73f4268d92564 F ext/jni/src/org/sqlite/jni/SQLFunction.java 09ce81c1c637e31c3a830d4c859cce95d65f5e02ff45f8bd1985b3479381bc46 -F ext/jni/src/org/sqlite/jni/SQLite3Jni.java 850012ad011d5d60a8aaaf9b8db562c60277988982793135814892e82ac77b97 -F ext/jni/src/org/sqlite/jni/Tester1.java 214985f8a700ac56a174809c04196215fef5d7bb8b86e095d9d0abf436349ffb +F ext/jni/src/org/sqlite/jni/SQLite3Jni.java 7268b37a32657798d01e116d639585cb5ed9f83b0760eb0416882bef79ffcb78 +F ext/jni/src/org/sqlite/jni/Tester1.java f6fcd218eb9d459866578d80b4223c15a2336af7fd52454c0be57bc855b1a892 F ext/jni/src/org/sqlite/jni/TesterFts5.java cf2d687baafffdeba219b77cf611fd47a0556248820ea794ae3e8259bfbdc5ee F ext/jni/src/org/sqlite/jni/Tracer.java a5cece9f947b0af27669b8baec300b6dd7ff859c3e6a6e4a1bd8b50f9714775d F ext/jni/src/org/sqlite/jni/UpdateHook.java e58645a1727f8a9bbe72dc072ec5b40d9f9362cb0aa24acfe93f49ff56a9016d @@ -2082,8 +2082,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 9dd8b78419e19e88bc3fbff9bf200390b146b2461af2bb6b93d8467036619e33 -R bf3d1c30171f035a2002cb68468caf2a +P e0fa03135942cd2fe732a74510d380ba78ab230c452168e638f32b4aee04b3f7 +R 67866ade0f482ffd79944a87403dd96d U stephan -Z a910bd2897a92249409319f7eba80e4a +Z b7b857a7fcd9ef420d175786c42a22e5 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 1516c35375..6a5c31e951 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e0fa03135942cd2fe732a74510d380ba78ab230c452168e638f32b4aee04b3f7 \ No newline at end of file +586720fa714ac74491cd85d0c6645242e55e5989ad312ef6e15e0b0acc6906ff \ No newline at end of file -- 2.39.5