From: drh Date: Thu, 5 Apr 2001 15:57:13 +0000 (+0000) Subject: i18n changes (CVS 201) X-Git-Tag: version-3.6.10~5854 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=297ecf146b0650d24d751628f5c71398760a9e0d;p=thirdparty%2Fsqlite.git i18n changes (CVS 201) FossilOrigin-Name: 8390f6521af0f1c5cd0298cc4a1dfa3f092c1e15 --- diff --git a/Makefile.in b/Makefile.in index 113efaa354..f3e04a9b9d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -139,7 +139,9 @@ parse.c: $(TOP)/src/parse.y lemon ./lemon parse.y sqlite.h: $(TOP)/src/sqlite.h.in - sed -e s/--VERS--/`cat ${TOP}/VERSION`/ $(TOP)/src/sqlite.h.in >sqlite.h + sed -e s/--VERS--/`cat ${TOP}/VERSION`/ \ + -e s/--ENCODING--/@ENCODING@/ \ + $(TOP)/src/sqlite.h.in >sqlite.h tokenize.o: $(TOP)/src/tokenize.c $(HDR) $(TCC) $(GDBM_FLAGS) -c $(TOP)/src/tokenize.c diff --git a/configure b/configure index 03f26984e9..23639edbc5 100755 --- a/configure +++ b/configure @@ -13,6 +13,8 @@ ac_default_prefix=/usr/local # Any additions from configure.in: ac_help="$ac_help --with-hints=FILE Read configuration options from FILE" +ac_help="$ac_help + --enable-utf8 Use UTF-8 encodings" # Initialize some variables set by options. # The variables have the same names as the options, with @@ -525,7 +527,7 @@ fi # The following RCS revision string applies to configure.in -# $Revision: 1.8 $ +# $Revision: 1.9 $ ######### # Make sure we are not building in a subdirectory of the source tree. @@ -600,7 +602,7 @@ if test "$config_BUILD_CC" = ""; then # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:604: checking for $ac_word" >&5 +echo "configure:606: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -630,7 +632,7 @@ if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:634: checking for $ac_word" >&5 +echo "configure:636: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -681,7 +683,7 @@ fi # Extract the first word of "cl", so it can be a program name with args. set dummy cl; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:685: checking for $ac_word" >&5 +echo "configure:687: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -713,7 +715,7 @@ fi fi echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6 -echo "configure:717: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 +echo "configure:719: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 ac_ext=c # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. @@ -724,12 +726,12 @@ cross_compiling=$ac_cv_prog_cc_cross cat > conftest.$ac_ext << EOF -#line 728 "configure" +#line 730 "configure" #include "confdefs.h" main(){return(0);} EOF -if { (eval echo configure:733: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:735: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then ac_cv_prog_cc_works=yes # If we can't run a trivial program, we are probably using a cross compiler. if (./conftest; exit) 2>/dev/null; then @@ -755,12 +757,12 @@ if test $ac_cv_prog_cc_works = no; then { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; } fi echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6 -echo "configure:759: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 +echo "configure:761: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6 cross_compiling=$ac_cv_prog_cc_cross echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6 -echo "configure:764: checking whether we are using GNU C" >&5 +echo "configure:766: checking whether we are using GNU C" >&5 if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -769,7 +771,7 @@ else yes; #endif EOF -if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:773: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then +if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:775: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then ac_cv_prog_gcc=yes else ac_cv_prog_gcc=no @@ -788,7 +790,7 @@ ac_test_CFLAGS="${CFLAGS+set}" ac_save_CFLAGS="$CFLAGS" CFLAGS= echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6 -echo "configure:792: checking whether ${CC-cc} accepts -g" >&5 +echo "configure:794: checking whether ${CC-cc} accepts -g" >&5 if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -827,12 +829,12 @@ fi else BUILD_CC=$config_BUILD_CC echo $ac_n "checking host compiler""... $ac_c" 1>&6 -echo "configure:831: checking host compiler" >&5 +echo "configure:833: checking host compiler" >&5 CC=$BUILD_CC echo "$ac_t""$BUILD_CC" 1>&6 fi echo $ac_n "checking switches for the host compiler""... $ac_c" 1>&6 -echo "configure:836: checking switches for the host compiler" >&5 +echo "configure:838: checking switches for the host compiler" >&5 if test "$config_BUILD_CFLAGS" != ""; then CFLAGS=$config_BUILD_CFLAGS BUILD_CFLAGS=$config_BUILD_CFLAGS @@ -852,7 +854,7 @@ fi # the target machine. # echo $ac_n "checking target compiler""... $ac_c" 1>&6 -echo "configure:856: checking target compiler" >&5 +echo "configure:858: checking target compiler" >&5 if test "$config_TARGET_CC" != ""; then TARGET_CC=$config_TARGET_CC else @@ -860,7 +862,7 @@ else fi echo "$ac_t""$TARGET_CC" 1>&6 echo $ac_n "checking switches on the target compiler""... $ac_c" 1>&6 -echo "configure:864: checking switches on the target compiler" >&5 +echo "configure:866: checking switches on the target compiler" >&5 if test "$config_TARGET_CFLAGS" != ""; then TARGET_CFLAGS=$config_TARGET_CFLAGS else @@ -868,7 +870,7 @@ else fi echo "$ac_t""$TARGET_CFLAGS" 1>&6 echo $ac_n "checking target linker""... $ac_c" 1>&6 -echo "configure:872: checking target linker" >&5 +echo "configure:874: checking target linker" >&5 if test "$config_TARGET_LINK" = ""; then TARGET_LINK=$TARGET_CC else @@ -876,7 +878,7 @@ else fi echo "$ac_t""$TARGET_LINK" 1>&6 echo $ac_n "checking switches on the target compiler""... $ac_c" 1>&6 -echo "configure:880: checking switches on the target compiler" >&5 +echo "configure:882: checking switches on the target compiler" >&5 if test "$config_TARGET_TFLAGS" != ""; then TARGET_TFLAGS=$config_TARGET_TFLAGS else @@ -888,7 +890,7 @@ else # Extract the first word of "ranlib", so it can be a program name with args. set dummy ranlib; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:892: checking for $ac_word" >&5 +echo "configure:894: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -934,7 +936,7 @@ echo "$ac_t""$TARGET_TFLAGS" 1>&6 # it 0 if we are not. # echo $ac_n "checking if host and target compilers are the same""... $ac_c" 1>&6 -echo "configure:938: checking if host and target compilers are the same" >&5 +echo "configure:940: checking if host and target compilers are the same" >&5 if test "$BUILD_CC" = "$TARGET_CC"; then cross=0 echo "$ac_t""yes" 1>&6 @@ -943,13 +945,35 @@ else echo "$ac_t""no" 1>&6 fi +########## +# Are we using UTF-8 or iso8859 encodings? +# +# Check whether --enable-utf8 or --disable-utf8 was given. +if test "${enable_utf8+set}" = set; then + enableval="$enable_utf8" + : +else + enable_utf8=no +fi + +echo $ac_n "checking character encoding""... $ac_c" 1>&6 +echo "configure:961: checking character encoding" >&5 +if test "$enable_utf8" = "no"; then + ENCODING=ISO8859 + echo "$ac_t""iso8859" 1>&6 +else + ENCODING=UTF8 + echo "$ac_t""UTF-8" 1>&6 +fi + + ########### # Lots of things are different if we are compiling for Windows using # the CYGWIN environment. So check for that special case and handle # things accordingly. # echo $ac_n "checking if executables have the .exe suffix""... $ac_c" 1>&6 -echo "configure:953: checking if executables have the .exe suffix" >&5 +echo "configure:977: checking if executables have the .exe suffix" >&5 if test "$config_BUILD_EXEEXT" = ".exe"; then CYGWIN=yes echo "$ac_t""yes" 1>&6 @@ -958,12 +982,12 @@ else fi if test "$CYGWIN" != "yes"; then echo $ac_n "checking for Cygwin environment""... $ac_c" 1>&6 -echo "configure:962: checking for Cygwin environment" >&5 +echo "configure:986: checking for Cygwin environment" >&5 if eval "test \"`echo '$''{'ac_cv_cygwin'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1002: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_cygwin=yes else @@ -1038,12 +1062,12 @@ else fi CC=$TARGET_CC echo $ac_n "checking for sin""... $ac_c" 1>&6 -echo "configure:1042: checking for sin" >&5 +echo "configure:1066: checking for sin" >&5 if eval "test \"`echo '$''{'ac_cv_func_sin'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:1094: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* eval "ac_cv_func_sin=yes" else @@ -1087,7 +1111,7 @@ LIBS="-lm" fi echo $ac_n "checking for dlopen in -ldl""... $ac_c" 1>&6 -echo "configure:1091: checking for dlopen in -ldl" >&5 +echo "configure:1115: checking for dlopen in -ldl" >&5 ac_lib_var=`echo dl'_'dlopen | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -1095,7 +1119,7 @@ else ac_save_LIBS="$LIBS" LIBS="-ldl $LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:1134: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=yes" else @@ -1140,14 +1164,14 @@ fi LIBS="" echo $ac_n "checking for library containing Tcl_Init""... $ac_c" 1>&6 -echo "configure:1144: checking for library containing Tcl_Init" >&5 +echo "configure:1168: checking for library containing Tcl_Init" >&5 if eval "test \"`echo '$''{'ac_cv_search_Tcl_Init'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else ac_func_search_save_LIBS="$LIBS" ac_cv_search_Tcl_Init="no" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:1186: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* ac_cv_search_Tcl_Init="none required" else @@ -1169,7 +1193,7 @@ rm -f conftest* test "$ac_cv_search_Tcl_Init" = "no" && for i in tcl8.4 tcl8.3 tcl8.2 tcl8.1 tcl8.0 tcl80 tcl; do LIBS="-l$i $otherlibs $ac_func_search_save_LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:1208: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* ac_cv_search_Tcl_Init="-l$i" break @@ -1209,7 +1233,7 @@ fi # Figure out where to get the TCL header files. # echo $ac_n "checking TCL header files""... $ac_c" 1>&6 -echo "configure:1213: checking TCL header files" >&5 +echo "configure:1237: checking TCL header files" >&5 found=no if test "$config_TARGET_TCL_INC" != ""; then TARGET_TCL_INC=$config_TARGET_TCL_INC @@ -1228,7 +1252,7 @@ if test "$found" = "yes"; then else echo "$ac_t""not specified: still searching..." 1>&6 echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 -echo "configure:1232: checking how to run the C preprocessor" >&5 +echo "configure:1256: checking how to run the C preprocessor" >&5 # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= @@ -1243,13 +1267,13 @@ else # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1253: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1277: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : @@ -1260,13 +1284,13 @@ else rm -rf conftest* CPP="${CC-cc} -E -traditional-cpp" cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1270: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1294: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : @@ -1277,13 +1301,13 @@ else rm -rf conftest* CPP="${CC-cc} -nologo -E" cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1287: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1311: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : @@ -1309,17 +1333,17 @@ echo "$ac_t""$CPP" 1>&6 ac_safe=`echo "tcl.h" | sed 'y%./+-%__p_%'` echo $ac_n "checking for tcl.h""... $ac_c" 1>&6 -echo "configure:1313: checking for tcl.h" >&5 +echo "configure:1337: checking for tcl.h" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1323: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1347: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then rm -rf conftest* @@ -1346,7 +1370,7 @@ if test "$found" = "no"; then ac_safe=`echo "$dir/include/tcl.h" | sed 'y%./+-%__p_%'` echo $ac_n "checking for $dir/include/tcl.h""... $ac_c" 1>&6 -echo "configure:1350: checking for $dir/include/tcl.h" >&5 +echo "configure:1374: checking for $dir/include/tcl.h" >&5 if eval "test \"`echo '$''{'ac_cv_file_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1390,14 +1414,14 @@ else LIBS="" echo $ac_n "checking for library containing gdbm_open""... $ac_c" 1>&6 -echo "configure:1394: checking for library containing gdbm_open" >&5 +echo "configure:1418: checking for library containing gdbm_open" >&5 if eval "test \"`echo '$''{'ac_cv_search_gdbm_open'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else ac_func_search_save_LIBS="$LIBS" ac_cv_search_gdbm_open="no" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:1436: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* ac_cv_search_gdbm_open="none required" else @@ -1419,7 +1443,7 @@ rm -f conftest* test "$ac_cv_search_gdbm_open" = "no" && for i in gdbm; do LIBS="-l$i $ac_func_search_save_LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:1458: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* ac_cv_search_gdbm_open="-l$i" break @@ -1458,7 +1482,7 @@ fi # Figure out where to get the GDBM header files. # echo $ac_n "checking GDBM header files""... $ac_c" 1>&6 -echo "configure:1462: checking GDBM header files" >&5 +echo "configure:1486: checking GDBM header files" >&5 found=no if test "$config_TARGET_GDBM_INC" != ""; then TARGET_GDBM_INC=$config_TARGET_GDBM_INC @@ -1470,17 +1494,17 @@ else echo "$ac_t""not specified: still searching..." 1>&6 ac_safe=`echo "gdbm.h" | sed 'y%./+-%__p_%'` echo $ac_n "checking for gdbm.h""... $ac_c" 1>&6 -echo "configure:1474: checking for gdbm.h" >&5 +echo "configure:1498: checking for gdbm.h" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1484: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1508: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then rm -rf conftest* @@ -1507,7 +1531,7 @@ if test "$found" = "no"; then ac_safe=`echo "$dir/include/gdbm.h" | sed 'y%./+-%__p_%'` echo $ac_n "checking for $dir/include/gdbm.h""... $ac_c" 1>&6 -echo "configure:1511: checking for $dir/include/gdbm.h" >&5 +echo "configure:1535: checking for $dir/include/gdbm.h" >&5 if eval "test \"`echo '$''{'ac_cv_file_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1548,14 +1572,14 @@ else LIBS="" echo $ac_n "checking for library containing readline""... $ac_c" 1>&6 -echo "configure:1552: checking for library containing readline" >&5 +echo "configure:1576: checking for library containing readline" >&5 if eval "test \"`echo '$''{'ac_cv_search_readline'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else ac_func_search_save_LIBS="$LIBS" ac_cv_search_readline="no" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:1594: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* ac_cv_search_readline="none required" else @@ -1577,7 +1601,7 @@ rm -f conftest* test "$ac_cv_search_readline" = "no" && for i in readline; do LIBS="-l$i $ac_func_search_save_LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:1616: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* ac_cv_search_readline="-l$i" break @@ -1616,7 +1640,7 @@ fi # Figure out where to get the READLINE header files. # echo $ac_n "checking readline header files""... $ac_c" 1>&6 -echo "configure:1620: checking readline header files" >&5 +echo "configure:1644: checking readline header files" >&5 found=no if test "$config_TARGET_READLINE_INC" != ""; then TARGET_READLINE_INC=$config_TARGET_READLINE_INC @@ -1628,17 +1652,17 @@ else echo "$ac_t""not specified: still searching..." 1>&6 ac_safe=`echo "readline.h" | sed 'y%./+-%__p_%'` echo $ac_n "checking for readline.h""... $ac_c" 1>&6 -echo "configure:1632: checking for readline.h" >&5 +echo "configure:1656: checking for readline.h" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1642: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1666: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then rm -rf conftest* @@ -1665,7 +1689,7 @@ if test "$found" = "no"; then ac_safe=`echo "$dir/include/readline.h" | sed 'y%./+-%__p_%'` echo $ac_n "checking for $dir/include/readline.h""... $ac_c" 1>&6 -echo "configure:1669: checking for $dir/include/readline.h" >&5 +echo "configure:1693: checking for $dir/include/readline.h" >&5 if eval "test \"`echo '$''{'ac_cv_file_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1694,7 +1718,7 @@ fi ac_safe=`echo "$dir/include/readline/readline.h" | sed 'y%./+-%__p_%'` echo $ac_n "checking for $dir/include/readline/readline.h""... $ac_c" 1>&6 -echo "configure:1698: checking for $dir/include/readline/readline.h" >&5 +echo "configure:1722: checking for $dir/include/readline/readline.h" >&5 if eval "test \"`echo '$''{'ac_cv_file_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1738,12 +1762,12 @@ fi # Figure out whether or not we have a "usleep()" function. # echo $ac_n "checking for usleep""... $ac_c" 1>&6 -echo "configure:1742: checking for usleep" >&5 +echo "configure:1766: checking for usleep" >&5 if eval "test \"`echo '$''{'ac_cv_func_usleep'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:1794: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* eval "ac_cv_func_usleep=yes" else @@ -1945,6 +1969,7 @@ s%@TARGET_LINK@%$TARGET_LINK%g s%@TARGET_LFLAGS@%$TARGET_LFLAGS%g s%@TARGET_RANLIB@%$TARGET_RANLIB%g s%@TARGET_AR@%$TARGET_AR%g +s%@ENCODING@%$ENCODING%g s%@BUILD_EXEEXT@%$BUILD_EXEEXT%g s%@OS_UNIX@%$OS_UNIX%g s%@OS_WIN@%$OS_WIN%g diff --git a/configure.in b/configure.in index 22a374d4d8..82e4efa987 100644 --- a/configure.in +++ b/configure.in @@ -151,7 +151,7 @@ AC_INIT(src/sqlite.h.in) dnl Put the RCS revision string after AC_INIT so that it will also dnl show in in configure. # The following RCS revision string applies to configure.in -# $Revision: 1.8 $ +# $Revision: 1.9 $ ######### # Make sure we are not building in a subdirectory of the source tree. @@ -309,6 +309,21 @@ else AC_MSG_RESULT(no) fi +########## +# Are we using UTF-8 or iso8859 encodings? +# +AC_ARG_ENABLE(utf8, +[ --enable-utf8 Use UTF-8 encodings],,enable_utf8=no) +AC_MSG_CHECKING([character encoding]) +if test "$enable_utf8" = "no"; then + ENCODING=ISO8859 + AC_MSG_RESULT([iso8859]) +else + ENCODING=UTF8 + AC_MSG_RESULT([UTF-8]) +fi +AC_SUBST(ENCODING) + ########### # Lots of things are different if we are compiling for Windows using # the CYGWIN environment. So check for that special case and handle diff --git a/manifest b/manifest index c13a9f897c..63d24a6a70 100644 --- a/manifest +++ b/manifest @@ -1,11 +1,11 @@ -C Version\s1.0.28\s(CVS\s475) -D 2001-04-04T21:30:00 +C i18n\schanges\s(CVS\s201) +D 2001-04-05T15:57:13 F COPYRIGHT 74a8a6531a42e124df07ab5599aad63870fa0bd4 -F Makefile.in fd8815aa01a7181f60f786158b7737a35413189e +F Makefile.in 25791375ce9f4f5b57d4cab67f0d58d772c96451 F README 51f6a4e7408b34afa5bc1c0485f61b6a4efb6958 F VERSION fb0fbad3b7a52736cc18ea5fcf1bc6dba7b2c40c -F configure 3dc1edb9dcf60215e31ff72b447935ab62211442 x -F configure.in d892ca33db7e88a055519ce2f36dcb11020e8fff +F configure 260d3be664b6d9b4d2d985e66b6dae1ef723c86e x +F configure.in 6940e3f88bf3d28a10c73b06ab99fd3a7e039a61 F doc/lemon.html e233a3e97a779c7a87e1bc4528c664a58e49dd47 F doc/report1.txt 734cbae63b1310cc643fe5e9e3da1ab55a79b99e F src/TODO 38a68a489e56e9fd4a96263e0ff9404a47368ad4 @@ -25,7 +25,7 @@ F src/ex/pg.h 23a4ac807b0546ec2bb6239ec8bd3e06926572cd F src/ex/sizes.tcl f54bad4a2ac567624be59131a6ee42d71b41a3d7 F src/expr.c 745383609b65d504a2cc04ac4d9389e9c8e2bc80 F src/insert.c 4bc1cab84f7805d560a1417734a532843e30b762 -F src/main.c 5afe29c425b875acede20f609485866eb5b276f6 +F src/main.c d52a1c2a7a964acca87880ac925019c383b8e606 F src/pager.h 889c5cf517ad30704e295540793c893ac843fd5f F src/parse.y 1ba81d3b75f37ca868aa0ab990bb977fd41519eb F src/printf.c af0dc65c293427272e1949c7807b1d88f10004fd @@ -33,22 +33,22 @@ F src/random.c b36c3f57dc80c8f354e6bfbf39cf1e1de021d54a F src/select.c a6bfdaa92d4614e79bf18129283c5163faa291fc F src/shell.c c1785b4af18192056adbe894f8626a7e7bdf47aa F src/shell.tcl 27ecbd63dd88396ad16d81ab44f73e6c0ea9d20e -F src/sqlite.h.in 3b446fcbed6005f0ab89632f3356c4708b349e88 +F src/sqlite.h.in f13156b85c51a6d7d06678bbe69554c90cde1fe2 F src/sqliteInt.h 97e2dd488ab433e27eda6e26f4c84a9a2684785c F src/table.c 5be76051a8ed6f6bfa641f4adc52529efa34fbf9 -F src/tclsqlite.c f654b0399ea8a29262637dbe71fdfe7c26bd9032 +F src/tclsqlite.c 82eda60c7ae5cd7b71023a55c5710a74713c313b F src/tokenize.c 8fc3936eefad84f1fff19e0892ed0542eb9ac7b3 F src/update.c 8365b3922ea098330d1e20862d6e64911e4e03d0 -F src/util.c 16a7af31c23db4066b2cfdc200a4067bc13d80ab -F src/vdbe.c eec6c26547108270df5cc304d214686ebf014f4f +F src/util.c aec315b834bad444c9e0e90efd9d2eaeeb37c90c +F src/vdbe.c 5f5be704686ed328275c35815e39d041a0c6cbb6 F src/vdbe.h dc1205da434c6a9da03b5d6b089270bbc8e6d437 F src/where.c 459bf37ac7849599da400420984b3306484b4cbb F test/all.test 15cac2f6b2d4c55bf896212aff3cc9d6597b0490 F test/copy.test b77a1214bd7756f2849d5c4fa6e715c0ff0c34eb F test/dbbe.test a022fe2d983848f786e17ef1fc6809cfd37fb02c F test/delete.test 50b9b1f06c843d591741dba7869433a105360dbf -F test/expr.test 278d7524079219f3bf9df41225903c9fb8c61c19 -F test/func.test 02aed8845b98bde1043dda97455de1d37238ebb3 +F test/expr.test 1e4822af0213734dd325521a6e25fe38f1fa7f9d +F test/func.test 11c415efe9d435aa0136edd0b3cb11efe5c3ead7 F test/in.test ea48016c4fcc479d315932ae2b8568146686ffaf F test/index.test b189ac11bf8d4fbcf87402f4028c25c8a6d91bb5 F test/insert.test dbd3bd189edb61fddbe66c236694ef23352429f1 @@ -83,18 +83,18 @@ F www/arch.fig 4f246003b7da23bd63b8b0af0618afb4ee3055c8 F www/arch.png 8dae0766d42ed3de9ed013c1341a5792bcf633e6 F www/arch.tcl a40380c1fe0080c43e6cc5c20ed70731511b06be F www/c_interface.tcl 11be2d5826eb7d6efd629751d3b483c1ed78ba14 -F www/changes.tcl 9cea962625b87620cfbb2ecb0ed9a8a5e6b2cee3 +F www/changes.tcl c6c8aa0fdd02d4dbc17803aa023279e0d3809ba5 F www/crosscompile.tcl c99efacb3aefaa550c6e80d91b240f55eb9fd33e F www/dynload.tcl 02eb8273aa78cfa9070dd4501dca937fb22b466c F www/fileformat.tcl cfb7fba80b7275555281ba2f256c00734bcdd1c9 -F www/index.tcl e6a1fb2adfa9a881d7bee0c86c2959d1a872e7bb +F www/index.tcl 2deb0b26970999691ed98c2e1a0bb180da8e74b8 F www/lang.tcl 7fec414487ebee2cbb17c90addf5a026cd10396a F www/mingw.tcl fc5f4ba9d336b6e8c97347cc6496d6162461ef60 F www/opcode.tcl cb3a1abf8b7b9be9f3a228d097d6bf8b742c2b6f F www/sqlite.tcl cb0d23d8f061a80543928755ec7775da6e4f362f F www/tclsqlite.tcl 06f81c401f79a04f2c5ebfb97e7c176225c0aef2 F www/vdbe.tcl 0c8aaa529dd216ccbf7daaabd80985e413d5f9ad -P 490d08a8c19d52ebb41999371e2664e29bcb1164 -R 30ef2fcf4f63f23edf35b1bdea9bb9e1 +P 8b4c87e8cf08db8e8ace57dc8dc8d110d18f19ed +R 80c18504638b99b08ae21db4afad8915 U drh -Z c09be351e07d658e489bb28e9f15d0cc +Z 27516acdd74264a621463b7fd9bffc1d diff --git a/manifest.uuid b/manifest.uuid index 9e7fa99400..8f38c635d9 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -8b4c87e8cf08db8e8ace57dc8dc8d110d18f19ed \ No newline at end of file +8390f6521af0f1c5cd0298cc4a1dfa3f092c1e15 \ No newline at end of file diff --git a/src/main.c b/src/main.c index 4205b70bed..6a024c620a 100644 --- a/src/main.c +++ b/src/main.c @@ -26,7 +26,7 @@ ** other files are for internal use by SQLite and should not be ** accessed by users of the library. ** -** $Id: main.c,v 1.25 2001/02/11 16:56:24 drh Exp $ +** $Id: main.c,v 1.26 2001/04/05 15:57:13 drh Exp $ */ #include "sqliteInt.h" #include @@ -190,6 +190,16 @@ static int sqliteInit(sqlite *db, char **pzErrMsg){ */ const char sqlite_version[] = SQLITE_VERSION; +/* +** Does the library expect data to be encoded as UTF-8 or iso8859? The +** following global constant always lets us know. +*/ +#ifdef SQLITE_UTF8 +char sqlite_encoding[] = "UTF-8"; +#else +char sqlite_encoding[] = "iso8859"; +#endif + /* ** Open a new SQLite database. Construct an "sqlite" structure to define ** the state of this database and return a pointer to that structure. diff --git a/src/sqlite.h.in b/src/sqlite.h.in index 59332dcf92..acb67e9aaa 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -24,7 +24,7 @@ ** This header file defines the interface that the sqlite library ** presents to client programs. ** -** @(#) $Id: sqlite.h.in,v 1.10 2001/04/03 16:53:22 drh Exp $ +** @(#) $Id: sqlite.h.in,v 1.11 2001/04/05 15:57:13 drh Exp $ */ #ifndef _SQLITE_H_ #define _SQLITE_H_ @@ -42,6 +42,21 @@ */ extern const char sqlite_version[]; +/* +** The SQLITE_UTF8 macro is defined if the library expects to see +** UTF-8 encoded data. The SQLITE_ISO8859 macro is defined if the +** iso8859 encoded should be used. +*/ +#define SQLITE_--ENCODING-- 1 + +/* +** The following constant holds one of two strings, "UTF-8" or "iso8859", +** depending on which character encoding the SQLite library expects to +** see. The character encoding makes a difference for the LIKE and GLOB +** operators and for the LENGTH() and SUBSTR() functions. +*/ +extern char sqlite_encoding[]; + /* ** Each open sqlite database is represented by an instance of the ** following opaque structure. diff --git a/src/tclsqlite.c b/src/tclsqlite.c index c880524220..6cbb827b51 100644 --- a/src/tclsqlite.c +++ b/src/tclsqlite.c @@ -23,7 +23,7 @@ ************************************************************************* ** A TCL Interface to SQLite ** -** $Id: tclsqlite.c,v 1.14 2001/04/03 16:53:22 drh Exp $ +** $Id: tclsqlite.c,v 1.15 2001/04/05 15:57:13 drh Exp $ */ #ifndef NO_TCL /* Omit this whole file if TCL is unavailable */ @@ -56,6 +56,16 @@ struct CallbackData { int tcl_rc; /* Return code from TCL script */ }; +/* +** If TCL uses UTF-8 and SQLite is configured to use iso8859, then we +** have to do a translation when going between the two. Set the +** UTF_TRANSLATION_NEEDED macro to indicate that we need to do +** this translation. +*/ +#if defined(TCL_UTF_MAX) && !defined(SQLITE_UTF8) +# define UTF_TRANSLATION_NEEDED 1 +#endif + /* ** Called for each row of the result. */ @@ -67,6 +77,9 @@ static int DbEvalCallback( ){ CallbackData *cbData = (CallbackData*)clientData; int i, rc; +#ifdef UTF_TRANSLATION_NEEDED + Tcl_DString dCol; +#endif if( cbData->zArray[0] ){ if( cbData->once ){ Tcl_SetVar2(cbData->interp, cbData->zArray, "*", "", 0); @@ -78,13 +91,28 @@ static int DbEvalCallback( for(i=0; iinterp, cbData->zArray, azN[i], + Tcl_DStringValue(&dCol), 0); + Tcl_DStringFree(&dCol); +#else Tcl_SetVar2(cbData->interp, cbData->zArray, azN[i], z, 0); +#endif } }else{ for(i=0; iinterp, azN[i], Tcl_DStringValue(&dCol), 0); + Tcl_DStringFree(&dCol); +#else Tcl_SetVar(cbData->interp, azN[i], z, 0); +#endif } } cbData->once = 0; @@ -111,7 +139,15 @@ static int DbEvalCallback2( for(i=0; iinterp = interp; zSql = Tcl_GetStringFromObj(objv[2], 0); +#ifdef UTF_TRANSLATION_NEEDED + Tcl_DStringInit(&dSql); + Tcl_UtfToExternalDString(NULL, zSql, -1, &dSql); + zSql = Tcl_DStringValue(&dSql); +#endif Tcl_IncrRefCount(objv[2]); if( objc==5 ){ cbData.interp = interp; @@ -303,6 +347,9 @@ static int DbObjCmd(void *cd, Tcl_Interp *interp, int objc,Tcl_Obj *const*objv){ rc = cbData.tcl_rc; } Tcl_DecrRefCount(objv[2]); +#ifdef UTF_TRANSLATION_NEEDED + Tcl_DStringFree(&dSql); +#endif return rc; } @@ -382,6 +429,7 @@ static int DbMain(void *cd, Tcl_Interp *interp, int argc, char **argv){ */ int Sqlite_Init(Tcl_Interp *interp){ Tcl_CreateCommand(interp, "sqlite", DbMain, 0, 0); + Tcl_SetVar(interp,"sqlite_encoding",sqlite_encoding,TCL_GLOBAL_ONLY); Tcl_PkgProvide(interp, "sqlite", "1.0"); return TCL_OK; } @@ -430,6 +478,7 @@ static char zMainloop[] = #define TCLSH_MAIN main /* Needed to fake out mktclapp */ int TCLSH_MAIN(int argc, char **argv){ Tcl_Interp *interp; + Tcl_FindExecutable(argv[0]); interp = Tcl_CreateInterp(); Sqlite_Init(interp); if( argc>=2 ){ diff --git a/src/util.c b/src/util.c index 9d45e1b4d0..6eb9c235a0 100644 --- a/src/util.c +++ b/src/util.c @@ -26,7 +26,7 @@ ** This file contains functions for allocating memory, comparing ** strings, and stuff like that. ** -** $Id: util.c,v 1.19 2001/04/04 21:10:19 drh Exp $ +** $Id: util.c,v 1.20 2001/04/05 15:57:13 drh Exp $ */ #include "sqliteInt.h" #include @@ -725,91 +725,60 @@ int sqliteSortCompare(const char *a, const char *b){ return res; } +#ifdef SQLITE_UTF8 /* -** When the first byte of a UTF-8 character is used as the -** index of the following array, then the value is the number -** of bytes in the whole UTF-8 character. This matrix assumes -** a well-formed UTF-8 string. All bets are off if the input -** is not well-formed. +** X is a pointer to the first byte of a UTF-8 character. Increment +** X so that it points to the next character. This only works right +** if X points to a well-formed UTF-8 string. */ -static const unsigned char utf8_width[] = { - /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ -/* 0x */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 1x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 2x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 3x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 4x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 6x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 8x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 9x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* Ax */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* Bx */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* Cx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -/* Dx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -/* Ex */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -/* Fx */ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1, -}; +#define sqliteNextChar(X) while( (0xc0&*++(X))==0x80 ){} +#define sqliteCharVal(X) sqlite_utf8_to_int(X) +#else /* !defined(SQLITE_UTF8) */ /* -** This routine computes the number of bytes to the start of the -** next UTF-8 character. We could just do -** -** z += utf8_width[*z] -** -** accomplish the same thing, if we know that z was a well-formed -** UTF-8 string. If it is not, then z might be incremented past -** its null terminator. This function, though slower, will never -** increment z past its terminator. +** For iso8859 encoding, the next character is just the next byte. */ -static int utf8_char_size(const unsigned char *z){ - int i, n = utf8_width[*z]; - for(i=1; i 0 ){ - c = (c<<6) | (0x3f & *(z++)); + static const int initVal[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, + 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, + 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, + 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 0, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 254, + 255, + }; + c = initVal[*(z++)]; + while( (0xc0&*z)==0x80 ){ + c = (c<<6) | (0x3f&*(z++)); } return c; } +#endif /* ** Compare two UTF-8 strings for equality where the first string can @@ -852,7 +821,7 @@ sqliteGlobCompare(const unsigned char *zPattern, const unsigned char *zString){ while( (c=zPattern[1]) == '*' || c == '?' ){ if( c=='?' ){ if( *zString==0 ) return 0; - zString += utf8_char_size(zString); + sqliteNextChar(zString); } zPattern++; } @@ -860,7 +829,7 @@ sqliteGlobCompare(const unsigned char *zPattern, const unsigned char *zString){ c = UpperToLower[c]; if( c=='[' ){ while( *zString && sqliteGlobCompare(&zPattern[1],zString)==0 ){ - zString += utf8_char_size(zString); + sqliteNextChar(zString); } return *zString!=0; }else{ @@ -868,13 +837,13 @@ sqliteGlobCompare(const unsigned char *zPattern, const unsigned char *zString){ while( c2 != 0 && c2 != c ){ c2 = *++zString; } if( c2==0 ) return 0; if( sqliteGlobCompare(&zPattern[1],zString) ) return 1; - zString += utf8_char_size(zString); + sqliteNextChar(zString); } return 0; } case '?': { if( *zString==0 ) return 0; - zString += utf8_char_size(zString); + sqliteNextChar(zString); zPattern++; break; } @@ -882,7 +851,7 @@ sqliteGlobCompare(const unsigned char *zPattern, const unsigned char *zString){ int prior_c = 0; seen = 0; invert = 0; - c = utf8_to_int(zString); + c = sqliteCharVal(zString); if( c==0 ) return 0; c2 = *++zPattern; if( c2=='^' ){ invert = 1; c2 = *++zPattern; } @@ -890,10 +859,10 @@ sqliteGlobCompare(const unsigned char *zPattern, const unsigned char *zString){ if( c==']' ) seen = 1; c2 = *++zPattern; } - while( (c2 = utf8_to_int(zPattern))!=0 && c2!=']' ){ + while( (c2 = sqliteCharVal(zPattern))!=0 && c2!=']' ){ if( c2=='-' && zPattern[1]!=']' && zPattern[1]!=0 && prior_c>0 ){ zPattern++; - c2 = utf8_to_int(zPattern); + c2 = sqliteCharVal(zPattern); if( c>=prior_c && c<=c2 ) seen = 1; prior_c = 0; }else if( c==c2 ){ @@ -902,10 +871,10 @@ sqliteGlobCompare(const unsigned char *zPattern, const unsigned char *zString){ }else{ prior_c = c2; } - zPattern += utf8_char_size(zPattern); + sqliteNextChar(zPattern); } if( c2==0 || (seen ^ invert)==0 ) return 0; - zString += utf8_char_size(zString); + sqliteNextChar(zString); zPattern++; break; } @@ -940,7 +909,7 @@ sqliteLikeCompare(const unsigned char *zPattern, const unsigned char *zString){ while( (c=zPattern[1]) == '%' || c == '_' ){ if( c=='_' ){ if( *zString==0 ) return 0; - zString += utf8_char_size(zString); + sqliteNextChar(zString); } zPattern++; } @@ -950,13 +919,13 @@ sqliteLikeCompare(const unsigned char *zPattern, const unsigned char *zString){ while( c2 != 0 && c2 != c ){ c2 = UpperToLower[*++zString]; } if( c2==0 ) return 0; if( sqliteLikeCompare(&zPattern[1],zString) ) return 1; - zString += utf8_char_size(zString); + sqliteNextChar(zString); } return 0; } case '_': { if( *zString==0 ) return 0; - zString += utf8_char_size(zString); + sqliteNextChar(zString); zPattern++; break; } diff --git a/src/vdbe.c b/src/vdbe.c index 61e28fbe03..8c0524582a 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -41,7 +41,7 @@ ** But other routines are also provided to help in building up ** a program instruction by instruction. ** -** $Id: vdbe.c,v 1.55 2001/04/04 21:22:14 drh Exp $ +** $Id: vdbe.c,v 1.56 2001/04/05 15:57:13 drh Exp $ */ #include "sqliteInt.h" #include @@ -3284,7 +3284,7 @@ int sqliteVdbeExec( break; } - /* Opcode: Length * * * + /* Opcode: Strlen * * * ** ** Interpret the top of the stack as a string. Replace the top of ** stack with an integer which is the length of the string. @@ -3294,7 +3294,14 @@ int sqliteVdbeExec( int len; VERIFY( if( tos<0 ) goto not_enough_stack; ) Stringify(p, tos); +#ifdef SQLITE_UTF8 + { + char *z = zStack[tos]; + for(len=0; *z; z++){ if( (0xc0&*z)!=0x80 ) len++; } + } +#else len = aStack[tos].n-1; +#endif POPSTACK; p->tos++; aStack[tos].i = len; @@ -3345,7 +3352,18 @@ int sqliteVdbeExec( } VERIFY( if( p->tos<0 ) goto not_enough_stack; ) Stringify(p, p->tos); + + /* "n" will be the number of characters in the input string. + ** For iso8859, the number of characters is the number of bytes. + ** Buf for UTF-8, some characters can use multiple bytes and the + ** situation is more complex. + */ +#ifdef SQLITE_UTF8 + z = zStack[p->tos]; + for(n=0; *z; z++){ if( (0xc0&*z)!=0x80 ) n++; } +#else n = aStack[p->tos].n - 1; +#endif if( start<0 ){ start += n + 1; if( start<0 ){ @@ -3360,6 +3378,27 @@ int sqliteVdbeExec( if( cnt > n ){ cnt = n; } + + /* At this point, "start" is the index of the first character to + ** extract and "cnt" is the number of characters to extract. We + ** need to convert units on these variable from characters into + ** bytes. For iso8859, the conversion is a no-op, but for UTF-8 + ** we have to do a little work. + */ +#ifdef SQLITE_UTF8 + { + int c_start = start; + int c_cnt = cnt; + int i; + z = zStack[p->tos]; + for(start=i=0; itos][start], cnt); diff --git a/test/expr.test b/test/expr.test index 76a4706bfe..19bcf4e620 100644 --- a/test/expr.test +++ b/test/expr.test @@ -23,7 +23,7 @@ # This file implements regression tests for SQLite library. The # focus of this file is testing expressions. # -# $Id: expr.test,v 1.11 2001/04/04 21:10:19 drh Exp $ +# $Id: expr.test,v 1.12 2001/04/05 15:57:14 drh Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl @@ -160,20 +160,32 @@ test_expr expr-5.11 {t1='abc', t2='xyz'} {t1 NOT LIKE t2} 1 test_expr expr-5.12 {t1='abc', t2='ABC'} {t1 NOT LIKE t2} 0 # The following tests only work on versions of TCL that support -# Unicode. +# Unicode and SQLite configured for UTF-8 support. # -test_expr expr-5.13 "t1='a\u0080c', t2='A_C'" {t1 LIKE t2} 1 -test_expr expr-5.14 "t1='a\u07FFc', t2='A_C'" {t1 LIKE t2} 1 -test_expr expr-5.15 "t1='a\u0800c', t2='A_C'" {t1 LIKE t2} 1 -test_expr expr-5.16 "t1='a\uFFFFc', t2='A_C'" {t1 LIKE t2} 1 -test_expr expr-5.17 "t1='a\u0080', t2='A__'" {t1 LIKE t2} 0 -test_expr expr-5.18 "t1='a\u07FF', t2='A__'" {t1 LIKE t2} 0 -test_expr expr-5.19 "t1='a\u0800', t2='A__'" {t1 LIKE t2} 0 -test_expr expr-5.20 "t1='a\uFFFF', t2='A__'" {t1 LIKE t2} 0 -test_expr expr-5.21 "t1='ax\uABCD', t2='A_\uABCD'" {t1 LIKE t2} 1 -test_expr expr-5.22 "t1='ax\u1234', t2='A%\u1234'" {t1 LIKE t2} 1 -test_expr expr-5.23 "t1='ax\uFEDC', t2='A_%'" {t1 LIKE t2} 1 -test_expr expr-5.24 "t1='ax\uFEDCy\uFEDC', t2='A%\uFEDC'" {t1 LIKE t2} 1 +if {"\u1234"!="u1234" && $::sqlite_encoding=="UTF-8"} { + test_expr expr-5.13 "t1='a\u0080c', t2='A_C'" {t1 LIKE t2} 1 + test_expr expr-5.14 "t1='a\u07FFc', t2='A_C'" {t1 LIKE t2} 1 + test_expr expr-5.15 "t1='a\u0800c', t2='A_C'" {t1 LIKE t2} 1 + test_expr expr-5.16 "t1='a\uFFFFc', t2='A_C'" {t1 LIKE t2} 1 + test_expr expr-5.17 "t1='a\u0080', t2='A__'" {t1 LIKE t2} 0 + test_expr expr-5.18 "t1='a\u07FF', t2='A__'" {t1 LIKE t2} 0 + test_expr expr-5.19 "t1='a\u0800', t2='A__'" {t1 LIKE t2} 0 + test_expr expr-5.20 "t1='a\uFFFF', t2='A__'" {t1 LIKE t2} 0 + test_expr expr-5.21 "t1='ax\uABCD', t2='A_\uABCD'" {t1 LIKE t2} 1 + test_expr expr-5.22 "t1='ax\u1234', t2='A%\u1234'" {t1 LIKE t2} 1 + test_expr expr-5.23 "t1='ax\uFEDC', t2='A_%'" {t1 LIKE t2} 1 + test_expr expr-5.24 "t1='ax\uFEDCy\uFEDC', t2='A%\uFEDC'" {t1 LIKE t2} 1 +} + +# Theses tests are for when SQLite assumes iso8859 characters. +# +if {$::sqlite_encoding=="iso8859"} { + catch {encoding system iso8859-1} + test_expr expr-5.50 "t1='a\266c', t2='A_C'" {t1 LIKE t2} 1 + test_expr expr-5.51 "t1='a\347', t2='A_'" {t1 LIKE t2} 1 + test_expr expr-5.52 "t1='ax\351', t2='A_\351'" {t1 LIKE t2} 1 + test_expr expr-5.53 "t1='ax\241', t2='A_%'" {t1 LIKE t2} 1 +} test_expr expr-6.1 {t1='abc', t2='xyz'} {t1 GLOB t2} 0 test_expr expr-6.2 {t1='abc', t2='ABC'} {t1 GLOB t2} 0 @@ -203,23 +215,43 @@ test_expr expr-6.25 {t1='ac', t2='a*?c'} {t1 GLOB t2} 0 # These tests only work on versions of TCL that support Unicode # -test_expr expr-6.26 "t1='a\u0080c', t2='a?c'" {t1 GLOB t2} 1 -test_expr expr-6.27 "t1='a\u07ffc', t2='a?c'" {t1 GLOB t2} 1 -test_expr expr-6.28 "t1='a\u0800c', t2='a?c'" {t1 GLOB t2} 1 -test_expr expr-6.29 "t1='a\uffffc', t2='a?c'" {t1 GLOB t2} 1 -test_expr expr-6.30 "t1='a\u1234', t2='a?'" {t1 GLOB t2} 1 -test_expr expr-6.31 "t1='a\u1234', t2='a??'" {t1 GLOB t2} 0 -test_expr expr-6.32 "t1='ax\u1234', t2='a?\u1234'" {t1 GLOB t2} 1 -test_expr expr-6.33 "t1='ax\u1234', t2='a*\u1234'" {t1 GLOB t2} 1 -test_expr expr-6.34 "t1='ax\u1234y\u1234', t2='a*\u1234'" {t1 GLOB t2} 1 -test_expr expr-6.35 "t1='a\u1234b', t2='a\[x\u1234y\]b'" {t1 GLOB t2} 1 -test_expr expr-6.36 "t1='a\u1234b', t2='a\[\u1233-\u1235\]b'" {t1 GLOB t2} 1 -test_expr expr-6.37 "t1='a\u1234b', t2='a\[\u1234-\u124f\]b'" {t1 GLOB t2} 1 -test_expr expr-6.38 "t1='a\u1234b', t2='a\[\u1235-\u124f\]b'" {t1 GLOB t2} 0 -test_expr expr-6.39 "t1='a\u1234b', t2='a\[a-\u1235\]b'" {t1 GLOB t2} 1 -test_expr expr-6.40 "t1='a\u1234b', t2='a\[a-\u1234\]b'" {t1 GLOB t2} 1 -test_expr expr-6.41 "t1='a\u1234b', t2='a\[a-\u1233\]b'" {t1 GLOB t2} 0 +if {"\u1234"!="u1234" && $::sqlite_encoding=="UTF-8"} { + test_expr expr-6.26 "t1='a\u0080c', t2='a?c'" {t1 GLOB t2} 1 + test_expr expr-6.27 "t1='a\u07ffc', t2='a?c'" {t1 GLOB t2} 1 + test_expr expr-6.28 "t1='a\u0800c', t2='a?c'" {t1 GLOB t2} 1 + test_expr expr-6.29 "t1='a\uffffc', t2='a?c'" {t1 GLOB t2} 1 + test_expr expr-6.30 "t1='a\u1234', t2='a?'" {t1 GLOB t2} 1 + test_expr expr-6.31 "t1='a\u1234', t2='a??'" {t1 GLOB t2} 0 + test_expr expr-6.32 "t1='ax\u1234', t2='a?\u1234'" {t1 GLOB t2} 1 + test_expr expr-6.33 "t1='ax\u1234', t2='a*\u1234'" {t1 GLOB t2} 1 + test_expr expr-6.34 "t1='ax\u1234y\u1234', t2='a*\u1234'" {t1 GLOB t2} 1 + test_expr expr-6.35 "t1='a\u1234b', t2='a\[x\u1234y\]b'" {t1 GLOB t2} 1 + test_expr expr-6.36 "t1='a\u1234b', t2='a\[\u1233-\u1235\]b'" {t1 GLOB t2} 1 + test_expr expr-6.37 "t1='a\u1234b', t2='a\[\u1234-\u124f\]b'" {t1 GLOB t2} 1 + test_expr expr-6.38 "t1='a\u1234b', t2='a\[\u1235-\u124f\]b'" {t1 GLOB t2} 0 + test_expr expr-6.39 "t1='a\u1234b', t2='a\[a-\u1235\]b'" {t1 GLOB t2} 1 + test_expr expr-6.40 "t1='a\u1234b', t2='a\[a-\u1234\]b'" {t1 GLOB t2} 1 + test_expr expr-6.41 "t1='a\u1234b', t2='a\[a-\u1233\]b'" {t1 GLOB t2} 0 +} +# Theses tests are for when SQLite assumes iso8859 characters. +# +if {$::sqlite_encoding=="iso8859"} { + catch {encoding system iso8859-1} + test_expr expr-6.50 "t1='a\266c', t2='a?c'" {t1 GLOB t2} 1 + test_expr expr-6.51 "t1='a\266', t2='a?'" {t1 GLOB t2} 1 + test_expr expr-6.52 "t1='a\266', t2='a??'" {t1 GLOB t2} 0 + test_expr expr-6.53 "t1='ax\266', t2='a??'" {t1 GLOB t2} 1 + test_expr expr-6.54 "t1='ax\266', t2='a?\266'" {t1 GLOB t2} 1 + test_expr expr-6.55 "t1='ax\266y\266', t2='a*\266'" {t1 GLOB t2} 1 + test_expr expr-6.56 "t1='a\266b', t2='a\[x\266y\]b'" {t1 GLOB t2} 1 + test_expr expr-6.57 "t1='a\266b', t2='a\[\260-\270\]b'" {t1 GLOB t2} 1 + test_expr expr-6.58 "t1='a\266b', t2='a\[\266-\270\]b'" {t1 GLOB t2} 1 + test_expr expr-6.59 "t1='a\266b', t2='a\[\267-\270\]b'" {t1 GLOB t2} 0 + test_expr expr-6.60 "t1='a\266b', t2='a\[x-\267\]b'" {t1 GLOB t2} 1 + test_expr expr-6.61 "t1='a\266b', t2='a\[x-\266\]b'" {t1 GLOB t2} 1 + test_expr expr-6.62 "t1='a\266b', t2='a\[x-\265\]b'" {t1 GLOB t2} 0 +} # The sqliteExprIfFalse and sqliteExprIfTrue routines are only # executed as part of a WHERE clause. Create a table suitable diff --git a/test/func.test b/test/func.test index 52d7db25bf..26a69e667e 100644 --- a/test/func.test +++ b/test/func.test @@ -23,21 +23,18 @@ # This file implements regression tests for SQLite library. The # focus of this file is testing built-in functions. # -# $Id: func.test,v 1.1 2000/08/28 16:22:00 drh Exp $ +# $Id: func.test,v 1.2 2001/04/05 15:57:14 drh Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl # Create a table to work with. # -execsql {CREATE TABLE tbl1(t1 text)} -foreach word {this program is free software} { - execsql "INSERT INTO tbl1 VALUES('$word')" -} - -# Make sure the table was created properly. -# do_test func-0.0 { + execsql {CREATE TABLE tbl1(t1 text)} + foreach word {this program is free software} { + execsql "INSERT INTO tbl1 VALUES('$word')" + } execsql {SELECT t1 FROM tbl1 ORDER BY t1} } {free is program software this} @@ -89,4 +86,51 @@ do_test func-2.8 { execsql {SELECT t1 FROM tbl1 ORDER BY substr(t1,2,20)} } {this software free program is} +# Only do the following tests if TCL has UTF-8 capabilities and +# the UTF-8 encoding is turned on in the SQLite library. +# +if {$::sqlite_encoding=="UTF-8" && "\u1234"!="u1234"} { + +# Put some UTF-8 characters in the database +# +do_test func-3.0 { + execsql {DELETE FROM tbl1} + foreach word "contains UTF-8 characters hi\u1234ho" { + execsql "INSERT INTO tbl1 VALUES('$word')" + } + execsql {SELECT t1 FROM tbl1 ORDER BY t1} +} "characters contains hi\u1234ho UTF-8" +do_test func-3.1 { + execsql {SELECT length(t1) FROM tbl1 ORDER BY t1} +} {10 8 5 5} +do_test func-3.2 { + execsql {SELECT substr(t1,1,2) FROM tbl1 ORDER BY t1} +} {ch co hi UT} +do_test func-3.3 { + execsql {SELECT substr(t1,1,3) FROM tbl1 ORDER BY t1} +} "cha con hi\u1234 UTF" +do_test func-3.4 { + execsql {SELECT substr(t1,2,2) FROM tbl1 ORDER BY t1} +} "ha on i\u1234 TF" +do_test func-3.5 { + execsql {SELECT substr(t1,2,3) FROM tbl1 ORDER BY t1} +} "har ont i\u1234h TF-" +do_test func-3.6 { + execsql {SELECT substr(t1,3,2) FROM tbl1 ORDER BY t1} +} "ar nt \u1234h F-" +do_test func-3.7 { + execsql {SELECT substr(t1,4,2) FROM tbl1 ORDER BY t1} +} "ra ta ho -8" +do_test func-3.8 { + execsql {SELECT substr(t1,-1,1) FROM tbl1 ORDER BY t1} +} "s s o 8" +do_test func-3.9 { + execsql {SELECT substr(t1,-3,2) FROM tbl1 ORDER BY t1} +} "er in \u1234h F-" +do_test func-3.10 { + execsql {SELECT substr(t1,-4,3) FROM tbl1 ORDER BY t1} +} "ter ain i\u1234h TF-" + +} ;# End sqlite_encoding==UTF-8 and \u1234!=u1234 + finish_test diff --git a/www/changes.tcl b/www/changes.tcl index 6298f333d1..5927ce52e6 100644 --- a/www/changes.tcl +++ b/www/changes.tcl @@ -19,9 +19,10 @@ proc chng {date desc} { chng {2001 Apr 5 (1.0.29)} {
  • The LIKE and GLOB operators now assume both operands are - UTF-8 strings. - ** This change could potentially - break existing code **
  • + UTF-8 strings if the library is configured with the "--enable-utf8" + option. If not configured for UTF-8 but using a version of TCL + that supports UTF-8, then a conversion from UTF-8 to iso8859 and + back again is done inside the TCL interface. } chng {2001 Apr 4 (1.0.28)} { diff --git a/www/index.tcl b/www/index.tcl index e0aa19fc53..ecd042210f 100644 --- a/www/index.tcl +++ b/www/index.tcl @@ -1,7 +1,7 @@ # # Run this TCL script to generate HTML for the index.html file. # -set rcsid {$Id: index.tcl,v 1.34 2001/04/04 21:10:19 drh Exp $} +set rcsid {$Id: index.tcl,v 1.35 2001/04/05 15:57:14 drh Exp $} puts { SQLite: An SQL Database Library Built Atop GDBM @@ -62,13 +62,6 @@ all code except for a few areas which are unreachable or which are only reached when malloc() fails. The code has been tested for memory leaks and is found to be clean.

    -

    Important Note: Beginning with version 1.0.29, the LIKE and -GLOB operators assume both operands are UTF-8 strings. Prior to that, -both operators assumed plain ASCII strings. Users of earlier versions -of SQLite that invoke LIKE or GLOB to compare strings containing -characters greater than 127 may have problems when they upgrade to -version 1.0.29 or later.

    -

    Important Note: Serious bugs have been found in versions 1.0.22 on Unix and 1.0.26 on Windows. Users of these or earlier versions of SQLite should upgrade.