xfs_scrub: transition from libunistring to libicu for Unicode processing

author Darrick J. Wong <darrick.wong@oracle.com>

Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)

committer Eric Sandeen <sandeen@redhat.com>

Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)
author Darrick J. Wong <darrick.wong@oracle.com>
Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)
committer Eric Sandeen <sandeen@redhat.com>
Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)
diff --git a/configure.ac b/configure.ac

index 686bf789bdffc1ad1646b5a65b184820f2147fb1..1885c4569d02ce997a4993678aed1a86aa3106f2 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -95,6 +95,11 @@ AC_ARG_ENABLE(lto,
         enable_lto=probe)
  AC_SUBST(enable_lto)
  
+# Enable libicu for xfs_scrubbing of malicious unicode sequences in names
+AC_ARG_ENABLE(libicu,
+[ --enable-libicu=[yes/no]   Enable Unicode name scanning (libicu) [default=probe]],,
+       enable_libicu=probe)
+
  #
  # If the user specified a libdir ending in lib64 do not append another
  # 64 to the library names.
@@ -173,8 +178,12 @@ AC_HAVE_DEVMAPPER
  AC_HAVE_MALLINFO
  AC_PACKAGE_WANT_ATTRIBUTES_H
  AC_HAVE_LIBATTR
-AC_PACKAGE_WANT_UNINORM_H
-AC_HAVE_U8NORMALIZE
+if test "$enable_libicu" = "yes" || test "$enable_libicu" = "probe"; then
+       AC_HAVE_LIBICU
+fi
+if test "$enable_libicu" = "yes" && test "$have_libicu" != "yes"; then
+        AC_MSG_ERROR([libicu not found.])
+fi
  AC_HAVE_OPENAT
  AC_HAVE_FSTATAT
  AC_HAVE_SG_IO
diff --git a/debian/control b/debian/control

index 2937c99198e919df01aa7dc80993b0b9d4209582..f4f807b0d043744005237c9d7e559ff661fb38ca 100644 (file)
--- a/debian/control
+++ b/debian/control
@@ -3,7 +3,7 @@ Section: admin
  Priority: optional
  Maintainer: XFS Development Team <linux-xfs@vger.kernel.org>
  Uploaders: Nathan Scott <nathans@debian.org>, Anibal Monsalve Salazar <anibal@debian.org>
-Build-Depends: uuid-dev, dh-autoreconf, debhelper (>= 5), gettext, libtool, libreadline-gplv2-dev, libblkid-dev (>= 2.17), linux-libc-dev, libdevmapper-dev, libattr1-dev, libunistring-dev, dh-python, pkg-config
+Build-Depends: uuid-dev, dh-autoreconf, debhelper (>= 5), gettext, libtool, libreadline-gplv2-dev, libblkid-dev (>= 2.17), linux-libc-dev, libdevmapper-dev, libattr1-dev, libicu-dev, dh-python, pkg-config
  Standards-Version: 4.0.0
  Homepage: https://xfs.wiki.kernel.org/
  
diff --git a/include/builddefs.in b/include/builddefs.in

index 7a2a6268671789a3b162cfabbd565c782cd554c4..8aac06cf90dc7c966e876a57f836ccc47a2a6859 100644 (file)
--- a/include/builddefs.in
+++ b/include/builddefs.in
@@ -36,7 +36,6 @@ LIBEDITLINE = @libeditline@
  LIBREADLINE = @libreadline@
  LIBBLKID = @libblkid@
  LIBDEVMAPPER = @libdevmapper@
-LIBUNISTRING = @libunistring@
  LIBXFS = $(TOPDIR)/libxfs/libxfs.la
  LIBFROG = $(TOPDIR)/libfrog/libfrog.la
  LIBXCMD = $(TOPDIR)/libxcmd/libxcmd.la
@@ -122,7 +121,7 @@ HAVE_MAP_SYNC = @have_map_sync@
  HAVE_DEVMAPPER = @have_devmapper@
  HAVE_MALLINFO = @have_mallinfo@
  HAVE_LIBATTR = @have_libattr@
-HAVE_U8NORMALIZE = @have_u8normalize@
+HAVE_LIBICU = @have_libicu@
  HAVE_OPENAT = @have_openat@
  HAVE_FSTATAT = @have_fstatat@
  HAVE_SG_IO = @have_sg_io@
@@ -173,6 +172,9 @@ ifeq ($(HAVE_GETFSMAP),yes)
  PCFLAGS+= -DHAVE_GETFSMAP
  endif
  
+LIBICU_LIBS = @libicu_LIBS@
+LIBICU_CFLAGS = @libicu_CFLAGS@
+
  SANITIZER_CFLAGS += @addrsan_cflags@ @threadsan_cflags@ @ubsan_cflags@
  SANITIZER_LDFLAGS += @addrsan_ldflags@ @threadsan_ldflags@ @ubsan_ldflags@
  
diff --git a/m4/Makefile b/m4/Makefile

index a6d11e9704ff03992d8b56801f092059e4569a72..cf0ce60bd39f74b34ad570363cbd3db1c8cbd615 100644 (file)
--- a/m4/Makefile
+++ b/m4/Makefile
@@ -23,7 +23,7 @@ LSRCFILES = \
         package_sanitizer.m4 \
         package_services.m4 \
         package_types.m4 \
-       package_unistring.m4 \
+       package_icu.m4 \
         package_utilies.m4 \
         package_uuiddev.m4 \
         multilib.m4 \
diff --git a/m4/package_icu.m4 b/m4/package_icu.m4

new file mode 100644 (file)

index 0000000..3ccbe0c
--- /dev/null
+++ b/m4/package_icu.m4
@@ -0,0 +1,6 @@
+AC_DEFUN([AC_HAVE_LIBICU],
+  [ PKG_CHECK_MODULES([libicu], [icu-i18n], [have_libicu=yes], [have_libicu=no])
+    AC_SUBST(have_libicu)
+    AC_SUBST(libicu_CFLAGS)
+    AC_SUBST(libicu_LIBS)
+  ])
diff --git a/m4/package_unistring.m4 b/m4/package_unistring.m4

deleted file mode 100644 (file)

index 9cbfcb0..0000000
--- a/m4/package_unistring.m4
+++ /dev/null
@@ -1,19 +0,0 @@
-AC_DEFUN([AC_PACKAGE_WANT_UNINORM_H],
-  [ AC_CHECK_HEADERS(uninorm.h)
-    if test $ac_cv_header_uninorm_h = no; then
-       AC_CHECK_HEADERS(uninorm.h,, [
-       echo
-       echo 'WARNING: could not find a valid uninorm.h header.'])
-    fi
-  ])
-
-AC_DEFUN([AC_HAVE_U8NORMALIZE],
-  [ AC_CHECK_LIB(unistring, u8_normalize,[
-       libunistring=-lunistring
-       have_u8normalize=yes
-    ],[
-       echo
-       echo 'WARNING: xfs_scrub will not be built with Unicode libraries.'])
-    AC_SUBST(libunistring)
-    AC_SUBST(have_u8normalize)
-  ])
diff --git a/scrub/Makefile b/scrub/Makefile

index 063279400262becf1db5acfd1eef62972a625154..bcc05a0ed994ef04cbe34ebde368403fc4a070d4 100644 (file)
--- a/scrub/Makefile
+++ b/scrub/Makefile
@@ -68,8 +68,8 @@ spacemap.c \
  vfs.c \
  xfs_scrub.c
  
-LLDLIBS += $(LIBHANDLE) $(LIBFROG) $(LIBPTHREAD) $(LIBUNISTRING) $(LIBRT)
-LTDEPENDENCIES += $(LIBHANDLE) $(LIBFROG) $(LIBUNISTRING) $(LIBRT)
+LLDLIBS += $(LIBHANDLE) $(LIBFROG) $(LIBPTHREAD) $(LIBICU_LIBS) $(LIBRT)
+LTDEPENDENCIES += $(LIBHANDLE) $(LIBFROG)
  LLDFLAGS = -static
  
  ifeq ($(HAVE_MALLINFO),yes)
@@ -84,9 +84,9 @@ ifeq ($(HAVE_LIBATTR),yes)
  LCFLAGS += -DHAVE_LIBATTR
  endif
  
-ifeq ($(HAVE_U8NORMALIZE),yes)
+ifeq ($(HAVE_LIBICU),yes)
  CFILES += unicrash.c
-LCFLAGS += -DHAVE_U8NORMALIZE
+LCFLAGS += -DHAVE_LIBICU $(LIBICU_CFLAGS)
  endif
  
  ifeq ($(HAVE_SG_IO),yes)
diff --git a/scrub/unicrash.c b/scrub/unicrash.c

index 51da32c2eb4f3245dc0c4e52c18426e89d880c82..06ccadf0ffa7ebab158fe19c9cc1cccfdc89e935 100644 (file)
--- a/scrub/unicrash.c
+++ b/scrub/unicrash.c
@@ -23,8 +23,9 @@
  #include <dirent.h>
  #include <sys/types.h>
  #include <sys/statvfs.h>
-#include <unistr.h>
-#include <uninorm.h>
+#include <strings.h>
+#include <unicode/ustring.h>
+#include <unicode/unorm2.h>
  #include "path.h"
  #include "xfs_scrub.h"
  #include "common.h"
@@ -63,7 +64,7 @@ struct name_entry {
         struct name_entry       *next;
  
         /* NFKC normalized name */
-       uint8_t                 *normstr;
+       UChar                   *normstr;
         size_t                  normstrlen;
  
         xfs_ino_t               ino;
@@ -77,6 +78,7 @@ struct name_entry {
  
  struct unicrash {
         struct scrub_ctx        *ctx;
+       const UNormalizer2      *normalizer;
         bool                    compare_ino;
         size_t                  nr_buckets;
         struct name_entry       *buckets[0];
@@ -135,23 +137,48 @@ name_entry_compute_checknames(
         struct unicrash         *uc,
         struct name_entry       *entry)
  {
-       uint8_t                 *normstr;
-       size_t                  normstrlen;
-
-       normstrlen = (entry->namelen * 2) + 1;
-       normstr = calloc(normstrlen, sizeof(uint8_t));
-       if (!normstr)
+       UChar                   *normstr;
+       UChar                   *unistr;
+       int32_t                 normstrlen;
+       int32_t                 unistrlen;
+       UErrorCode              uerr = U_ZERO_ERROR;
+
+       /* Convert bytestr to unistr for normalization */
+       u_strFromUTF8(NULL, 0, &unistrlen, entry->name, entry->namelen, &uerr);
+       if (uerr != U_BUFFER_OVERFLOW_ERROR)
                 return false;
-
-       if (!u8_normalize(UNINORM_NFKC, (const uint8_t *)entry->name,
-                       entry->namelen, normstr, &normstrlen));
+       uerr = U_ZERO_ERROR;
+       unistr = calloc(unistrlen + 1, sizeof(UChar));
+       if (!unistr)
+               return false;
+       u_strFromUTF8(unistr, unistrlen, NULL, entry->name, entry->namelen,
+                       &uerr);
+       if (U_FAILURE(uerr))
+               goto out_unistr;
+
+       /* Normalize the string. */
+       normstrlen = unorm2_normalize(uc->normalizer, unistr, unistrlen, NULL,
+                       0, &uerr);
+       if (uerr != U_BUFFER_OVERFLOW_ERROR)
+               goto out_unistr;
+       uerr = U_ZERO_ERROR;
+       normstr = calloc(normstrlen + 1, sizeof(UChar));
+       if (!normstr)
+               goto out_unistr;
+       unorm2_normalize(uc->normalizer, unistr, unistrlen, normstr, normstrlen,
+                       &uerr);
+       if (U_FAILURE(uerr))
                 goto out_normstr;
  
         entry->normstr = normstr;
         entry->normstrlen = normstrlen;
+       free(unistr);
         return true;
+
  out_normstr:
         free(normstr);
+out_unistr:
+       free(unistr);
         return false;
  }
  
@@ -214,8 +241,8 @@ name_entry_hash(
         size_t                  namelen;
         xfs_dahash_t            hash;
  
-       name = entry->normstr;
-       namelen = entry->normstrlen;
+       name = (uint8_t *)entry->normstr;
+       namelen = entry->normstrlen * sizeof(UChar);
  
         /*
          * Do four characters at a time as long as we can.
@@ -249,6 +276,7 @@ unicrash_init(
         size_t                  nr_buckets)
  {
         struct unicrash         *p;
+       UErrorCode              uerr = U_ZERO_ERROR;
  
         if (!is_utf8_locale()) {
                 *ucp = NULL;
@@ -266,9 +294,15 @@ unicrash_init(
         p->ctx = ctx;
         p->nr_buckets = nr_buckets;
         p->compare_ino = compare_ino;
+       p->normalizer = unorm2_getNFKCInstance(&uerr);
+       if (U_FAILURE(uerr))
+               goto out_free;
         *ucp = p;
  
         return true;
+out_free:
+       free(p);
+       return false;
  }
  
  /* Initialize the collision detector for a directory. */
@@ -378,7 +412,7 @@ unicrash_add(
         while (entry != NULL) {
                 /* Same normalization? */
                 if (new_entry->normstrlen == entry->normstrlen &&
-                   !u8_strcmp(new_entry->normstr, entry->normstr) &&
+                   !u_strcmp(new_entry->normstr, entry->normstr) &&
                     (uc->compare_ino ? entry->ino != new_entry->ino : true)) {
                         *badflags |= UNICRASH_NOT_UNIQUE;
                         *existing_entry = entry;
diff --git a/scrub/unicrash.h b/scrub/unicrash.h

index 33373190ac8d1f7ca0509a020fffa70ad51f60b4..67e70aed3ba58c062d2366264a2a23cbcaf8d405 100644 (file)
--- a/scrub/unicrash.h
+++ b/scrub/unicrash.h
@@ -23,7 +23,7 @@
  struct unicrash;
  
  /* Unicode name collision detection. */
-#ifdef HAVE_U8NORMALIZE
+#ifdef HAVE_LIBICU
  
  struct dirent;
  
@@ -42,6 +42,6 @@ bool unicrash_check_xattr_name(struct unicrash *uc, const char *descr,
  # define unicrash_free(u)                      do {(u) = (u);} while (0)
  # define unicrash_check_dir_name(u, d, n)      (true)
  # define unicrash_check_xattr_name(u, d, n)    (true)
-#endif /* HAVE_U8NORMALIZE */
+#endif /* HAVE_LIBICU */
  
  #endif /* XFS_SCRUB_UNICRASH_H_ */
author	Darrick J. Wong <darrick.wong@oracle.com>
	Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)
committer	Eric Sandeen <sandeen@redhat.com>
	Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)
configure.ac		patch \| blob \| blame \| history
debian/control		patch \| blob \| blame \| history
include/builddefs.in		patch \| blob \| blame \| history
m4/Makefile		patch \| blob \| blame \| history
m4/package_icu.m4	[new file with mode: 0644]	patch \| blob
m4/package_unistring.m4	[deleted file]	patch \| blob \| blame \| history
scrub/Makefile		patch \| blob \| blame \| history
scrub/unicrash.c		patch \| blob \| blame \| history
scrub/unicrash.h		patch \| blob \| blame \| history