]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
xfs_scrub: transition from libunistring to libicu for Unicode processing
authorDarrick J. Wong <darrick.wong@oracle.com>
Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)
committerEric Sandeen <sandeen@redhat.com>
Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)
Move off of libunistring and onto libicu for Unicode name scanning.
This will make it easy to warn about unicode code points that do not
belong in identifiers (directional overrides, zero width elements) and
warn about names that could render similarly enough to cause confusion.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Acked-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
configure.ac
debian/control
include/builddefs.in
m4/Makefile
m4/package_icu.m4 [new file with mode: 0644]
m4/package_unistring.m4 [deleted file]
scrub/Makefile
scrub/unicrash.c
scrub/unicrash.h

index 686bf789bdffc1ad1646b5a65b184820f2147fb1..1885c4569d02ce997a4993678aed1a86aa3106f2 100644 (file)
@@ -95,6 +95,11 @@ AC_ARG_ENABLE(lto,
        enable_lto=probe)
 AC_SUBST(enable_lto)
 
+# Enable libicu for xfs_scrubbing of malicious unicode sequences in names
+AC_ARG_ENABLE(libicu,
+[ --enable-libicu=[yes/no]   Enable Unicode name scanning (libicu) [default=probe]],,
+       enable_libicu=probe)
+
 #
 # If the user specified a libdir ending in lib64 do not append another
 # 64 to the library names.
@@ -173,8 +178,12 @@ AC_HAVE_DEVMAPPER
 AC_HAVE_MALLINFO
 AC_PACKAGE_WANT_ATTRIBUTES_H
 AC_HAVE_LIBATTR
-AC_PACKAGE_WANT_UNINORM_H
-AC_HAVE_U8NORMALIZE
+if test "$enable_libicu" = "yes" || test "$enable_libicu" = "probe"; then
+       AC_HAVE_LIBICU
+fi
+if test "$enable_libicu" = "yes" && test "$have_libicu" != "yes"; then
+        AC_MSG_ERROR([libicu not found.])
+fi
 AC_HAVE_OPENAT
 AC_HAVE_FSTATAT
 AC_HAVE_SG_IO
index 2937c99198e919df01aa7dc80993b0b9d4209582..f4f807b0d043744005237c9d7e559ff661fb38ca 100644 (file)
@@ -3,7 +3,7 @@ Section: admin
 Priority: optional
 Maintainer: XFS Development Team <linux-xfs@vger.kernel.org>
 Uploaders: Nathan Scott <nathans@debian.org>, Anibal Monsalve Salazar <anibal@debian.org>
-Build-Depends: uuid-dev, dh-autoreconf, debhelper (>= 5), gettext, libtool, libreadline-gplv2-dev, libblkid-dev (>= 2.17), linux-libc-dev, libdevmapper-dev, libattr1-dev, libunistring-dev, dh-python, pkg-config
+Build-Depends: uuid-dev, dh-autoreconf, debhelper (>= 5), gettext, libtool, libreadline-gplv2-dev, libblkid-dev (>= 2.17), linux-libc-dev, libdevmapper-dev, libattr1-dev, libicu-dev, dh-python, pkg-config
 Standards-Version: 4.0.0
 Homepage: https://xfs.wiki.kernel.org/
 
index 7a2a6268671789a3b162cfabbd565c782cd554c4..8aac06cf90dc7c966e876a57f836ccc47a2a6859 100644 (file)
@@ -36,7 +36,6 @@ LIBEDITLINE = @libeditline@
 LIBREADLINE = @libreadline@
 LIBBLKID = @libblkid@
 LIBDEVMAPPER = @libdevmapper@
-LIBUNISTRING = @libunistring@
 LIBXFS = $(TOPDIR)/libxfs/libxfs.la
 LIBFROG = $(TOPDIR)/libfrog/libfrog.la
 LIBXCMD = $(TOPDIR)/libxcmd/libxcmd.la
@@ -122,7 +121,7 @@ HAVE_MAP_SYNC = @have_map_sync@
 HAVE_DEVMAPPER = @have_devmapper@
 HAVE_MALLINFO = @have_mallinfo@
 HAVE_LIBATTR = @have_libattr@
-HAVE_U8NORMALIZE = @have_u8normalize@
+HAVE_LIBICU = @have_libicu@
 HAVE_OPENAT = @have_openat@
 HAVE_FSTATAT = @have_fstatat@
 HAVE_SG_IO = @have_sg_io@
@@ -173,6 +172,9 @@ ifeq ($(HAVE_GETFSMAP),yes)
 PCFLAGS+= -DHAVE_GETFSMAP
 endif
 
+LIBICU_LIBS = @libicu_LIBS@
+LIBICU_CFLAGS = @libicu_CFLAGS@
+
 SANITIZER_CFLAGS += @addrsan_cflags@ @threadsan_cflags@ @ubsan_cflags@
 SANITIZER_LDFLAGS += @addrsan_ldflags@ @threadsan_ldflags@ @ubsan_ldflags@
 
index a6d11e9704ff03992d8b56801f092059e4569a72..cf0ce60bd39f74b34ad570363cbd3db1c8cbd615 100644 (file)
@@ -23,7 +23,7 @@ LSRCFILES = \
        package_sanitizer.m4 \
        package_services.m4 \
        package_types.m4 \
-       package_unistring.m4 \
+       package_icu.m4 \
        package_utilies.m4 \
        package_uuiddev.m4 \
        multilib.m4 \
diff --git a/m4/package_icu.m4 b/m4/package_icu.m4
new file mode 100644 (file)
index 0000000..3ccbe0c
--- /dev/null
@@ -0,0 +1,6 @@
+AC_DEFUN([AC_HAVE_LIBICU],
+  [ PKG_CHECK_MODULES([libicu], [icu-i18n], [have_libicu=yes], [have_libicu=no])
+    AC_SUBST(have_libicu)
+    AC_SUBST(libicu_CFLAGS)
+    AC_SUBST(libicu_LIBS)
+  ])
diff --git a/m4/package_unistring.m4 b/m4/package_unistring.m4
deleted file mode 100644 (file)
index 9cbfcb0..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-AC_DEFUN([AC_PACKAGE_WANT_UNINORM_H],
-  [ AC_CHECK_HEADERS(uninorm.h)
-    if test $ac_cv_header_uninorm_h = no; then
-       AC_CHECK_HEADERS(uninorm.h,, [
-       echo
-       echo 'WARNING: could not find a valid uninorm.h header.'])
-    fi
-  ])
-
-AC_DEFUN([AC_HAVE_U8NORMALIZE],
-  [ AC_CHECK_LIB(unistring, u8_normalize,[
-       libunistring=-lunistring
-       have_u8normalize=yes
-    ],[
-       echo
-       echo 'WARNING: xfs_scrub will not be built with Unicode libraries.'])
-    AC_SUBST(libunistring)
-    AC_SUBST(have_u8normalize)
-  ])
index 063279400262becf1db5acfd1eef62972a625154..bcc05a0ed994ef04cbe34ebde368403fc4a070d4 100644 (file)
@@ -68,8 +68,8 @@ spacemap.c \
 vfs.c \
 xfs_scrub.c
 
-LLDLIBS += $(LIBHANDLE) $(LIBFROG) $(LIBPTHREAD) $(LIBUNISTRING) $(LIBRT)
-LTDEPENDENCIES += $(LIBHANDLE) $(LIBFROG) $(LIBUNISTRING) $(LIBRT)
+LLDLIBS += $(LIBHANDLE) $(LIBFROG) $(LIBPTHREAD) $(LIBICU_LIBS) $(LIBRT)
+LTDEPENDENCIES += $(LIBHANDLE) $(LIBFROG)
 LLDFLAGS = -static
 
 ifeq ($(HAVE_MALLINFO),yes)
@@ -84,9 +84,9 @@ ifeq ($(HAVE_LIBATTR),yes)
 LCFLAGS += -DHAVE_LIBATTR
 endif
 
-ifeq ($(HAVE_U8NORMALIZE),yes)
+ifeq ($(HAVE_LIBICU),yes)
 CFILES += unicrash.c
-LCFLAGS += -DHAVE_U8NORMALIZE
+LCFLAGS += -DHAVE_LIBICU $(LIBICU_CFLAGS)
 endif
 
 ifeq ($(HAVE_SG_IO),yes)
index 51da32c2eb4f3245dc0c4e52c18426e89d880c82..06ccadf0ffa7ebab158fe19c9cc1cccfdc89e935 100644 (file)
@@ -23,8 +23,9 @@
 #include <dirent.h>
 #include <sys/types.h>
 #include <sys/statvfs.h>
-#include <unistr.h>
-#include <uninorm.h>
+#include <strings.h>
+#include <unicode/ustring.h>
+#include <unicode/unorm2.h>
 #include "path.h"
 #include "xfs_scrub.h"
 #include "common.h"
@@ -63,7 +64,7 @@ struct name_entry {
        struct name_entry       *next;
 
        /* NFKC normalized name */
-       uint8_t                 *normstr;
+       UChar                   *normstr;
        size_t                  normstrlen;
 
        xfs_ino_t               ino;
@@ -77,6 +78,7 @@ struct name_entry {
 
 struct unicrash {
        struct scrub_ctx        *ctx;
+       const UNormalizer2      *normalizer;
        bool                    compare_ino;
        size_t                  nr_buckets;
        struct name_entry       *buckets[0];
@@ -135,23 +137,48 @@ name_entry_compute_checknames(
        struct unicrash         *uc,
        struct name_entry       *entry)
 {
-       uint8_t                 *normstr;
-       size_t                  normstrlen;
-
-       normstrlen = (entry->namelen * 2) + 1;
-       normstr = calloc(normstrlen, sizeof(uint8_t));
-       if (!normstr)
+       UChar                   *normstr;
+       UChar                   *unistr;
+       int32_t                 normstrlen;
+       int32_t                 unistrlen;
+       UErrorCode              uerr = U_ZERO_ERROR;
+
+       /* Convert bytestr to unistr for normalization */
+       u_strFromUTF8(NULL, 0, &unistrlen, entry->name, entry->namelen, &uerr);
+       if (uerr != U_BUFFER_OVERFLOW_ERROR)
                return false;
-
-       if (!u8_normalize(UNINORM_NFKC, (const uint8_t *)entry->name,
-                       entry->namelen, normstr, &normstrlen));
+       uerr = U_ZERO_ERROR;
+       unistr = calloc(unistrlen + 1, sizeof(UChar));
+       if (!unistr)
+               return false;
+       u_strFromUTF8(unistr, unistrlen, NULL, entry->name, entry->namelen,
+                       &uerr);
+       if (U_FAILURE(uerr))
+               goto out_unistr;
+
+       /* Normalize the string. */
+       normstrlen = unorm2_normalize(uc->normalizer, unistr, unistrlen, NULL,
+                       0, &uerr);
+       if (uerr != U_BUFFER_OVERFLOW_ERROR)
+               goto out_unistr;
+       uerr = U_ZERO_ERROR;
+       normstr = calloc(normstrlen + 1, sizeof(UChar));
+       if (!normstr)
+               goto out_unistr;
+       unorm2_normalize(uc->normalizer, unistr, unistrlen, normstr, normstrlen,
+                       &uerr);
+       if (U_FAILURE(uerr))
                goto out_normstr;
 
        entry->normstr = normstr;
        entry->normstrlen = normstrlen;
+       free(unistr);
        return true;
+
 out_normstr:
        free(normstr);
+out_unistr:
+       free(unistr);
        return false;
 }
 
@@ -214,8 +241,8 @@ name_entry_hash(
        size_t                  namelen;
        xfs_dahash_t            hash;
 
-       name = entry->normstr;
-       namelen = entry->normstrlen;
+       name = (uint8_t *)entry->normstr;
+       namelen = entry->normstrlen * sizeof(UChar);
 
        /*
         * Do four characters at a time as long as we can.
@@ -249,6 +276,7 @@ unicrash_init(
        size_t                  nr_buckets)
 {
        struct unicrash         *p;
+       UErrorCode              uerr = U_ZERO_ERROR;
 
        if (!is_utf8_locale()) {
                *ucp = NULL;
@@ -266,9 +294,15 @@ unicrash_init(
        p->ctx = ctx;
        p->nr_buckets = nr_buckets;
        p->compare_ino = compare_ino;
+       p->normalizer = unorm2_getNFKCInstance(&uerr);
+       if (U_FAILURE(uerr))
+               goto out_free;
        *ucp = p;
 
        return true;
+out_free:
+       free(p);
+       return false;
 }
 
 /* Initialize the collision detector for a directory. */
@@ -378,7 +412,7 @@ unicrash_add(
        while (entry != NULL) {
                /* Same normalization? */
                if (new_entry->normstrlen == entry->normstrlen &&
-                   !u8_strcmp(new_entry->normstr, entry->normstr) &&
+                   !u_strcmp(new_entry->normstr, entry->normstr) &&
                    (uc->compare_ino ? entry->ino != new_entry->ino : true)) {
                        *badflags |= UNICRASH_NOT_UNIQUE;
                        *existing_entry = entry;
index 33373190ac8d1f7ca0509a020fffa70ad51f60b4..67e70aed3ba58c062d2366264a2a23cbcaf8d405 100644 (file)
@@ -23,7 +23,7 @@
 struct unicrash;
 
 /* Unicode name collision detection. */
-#ifdef HAVE_U8NORMALIZE
+#ifdef HAVE_LIBICU
 
 struct dirent;
 
@@ -42,6 +42,6 @@ bool unicrash_check_xattr_name(struct unicrash *uc, const char *descr,
 # define unicrash_free(u)                      do {(u) = (u);} while (0)
 # define unicrash_check_dir_name(u, d, n)      (true)
 # define unicrash_check_xattr_name(u, d, n)    (true)
-#endif /* HAVE_U8NORMALIZE */
+#endif /* HAVE_LIBICU */
 
 #endif /* XFS_SCRUB_UNICRASH_H_ */