Move off of libunistring and onto libicu for Unicode name scanning.
This will make it easy to warn about unicode code points that do not
belong in identifiers (directional overrides, zero width elements) and
warn about names that could render similarly enough to cause confusion.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Acked-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
enable_lto=probe)
AC_SUBST(enable_lto)
+# Enable libicu for xfs_scrubbing of malicious unicode sequences in names
+AC_ARG_ENABLE(libicu,
+[ --enable-libicu=[yes/no] Enable Unicode name scanning (libicu) [default=probe]],,
+ enable_libicu=probe)
+
#
# If the user specified a libdir ending in lib64 do not append another
# 64 to the library names.
AC_HAVE_MALLINFO
AC_PACKAGE_WANT_ATTRIBUTES_H
AC_HAVE_LIBATTR
-AC_PACKAGE_WANT_UNINORM_H
-AC_HAVE_U8NORMALIZE
+if test "$enable_libicu" = "yes" || test "$enable_libicu" = "probe"; then
+ AC_HAVE_LIBICU
+fi
+if test "$enable_libicu" = "yes" && test "$have_libicu" != "yes"; then
+ AC_MSG_ERROR([libicu not found.])
+fi
AC_HAVE_OPENAT
AC_HAVE_FSTATAT
AC_HAVE_SG_IO
Priority: optional
Maintainer: XFS Development Team <linux-xfs@vger.kernel.org>
Uploaders: Nathan Scott <nathans@debian.org>, Anibal Monsalve Salazar <anibal@debian.org>
-Build-Depends: uuid-dev, dh-autoreconf, debhelper (>= 5), gettext, libtool, libreadline-gplv2-dev, libblkid-dev (>= 2.17), linux-libc-dev, libdevmapper-dev, libattr1-dev, libunistring-dev, dh-python, pkg-config
+Build-Depends: uuid-dev, dh-autoreconf, debhelper (>= 5), gettext, libtool, libreadline-gplv2-dev, libblkid-dev (>= 2.17), linux-libc-dev, libdevmapper-dev, libattr1-dev, libicu-dev, dh-python, pkg-config
Standards-Version: 4.0.0
Homepage: https://xfs.wiki.kernel.org/
LIBREADLINE = @libreadline@
LIBBLKID = @libblkid@
LIBDEVMAPPER = @libdevmapper@
-LIBUNISTRING = @libunistring@
LIBXFS = $(TOPDIR)/libxfs/libxfs.la
LIBFROG = $(TOPDIR)/libfrog/libfrog.la
LIBXCMD = $(TOPDIR)/libxcmd/libxcmd.la
HAVE_DEVMAPPER = @have_devmapper@
HAVE_MALLINFO = @have_mallinfo@
HAVE_LIBATTR = @have_libattr@
-HAVE_U8NORMALIZE = @have_u8normalize@
+HAVE_LIBICU = @have_libicu@
HAVE_OPENAT = @have_openat@
HAVE_FSTATAT = @have_fstatat@
HAVE_SG_IO = @have_sg_io@
PCFLAGS+= -DHAVE_GETFSMAP
endif
+LIBICU_LIBS = @libicu_LIBS@
+LIBICU_CFLAGS = @libicu_CFLAGS@
+
SANITIZER_CFLAGS += @addrsan_cflags@ @threadsan_cflags@ @ubsan_cflags@
SANITIZER_LDFLAGS += @addrsan_ldflags@ @threadsan_ldflags@ @ubsan_ldflags@
package_sanitizer.m4 \
package_services.m4 \
package_types.m4 \
- package_unistring.m4 \
+ package_icu.m4 \
package_utilies.m4 \
package_uuiddev.m4 \
multilib.m4 \
--- /dev/null
+AC_DEFUN([AC_HAVE_LIBICU],
+ [ PKG_CHECK_MODULES([libicu], [icu-i18n], [have_libicu=yes], [have_libicu=no])
+ AC_SUBST(have_libicu)
+ AC_SUBST(libicu_CFLAGS)
+ AC_SUBST(libicu_LIBS)
+ ])
+++ /dev/null
-AC_DEFUN([AC_PACKAGE_WANT_UNINORM_H],
- [ AC_CHECK_HEADERS(uninorm.h)
- if test $ac_cv_header_uninorm_h = no; then
- AC_CHECK_HEADERS(uninorm.h,, [
- echo
- echo 'WARNING: could not find a valid uninorm.h header.'])
- fi
- ])
-
-AC_DEFUN([AC_HAVE_U8NORMALIZE],
- [ AC_CHECK_LIB(unistring, u8_normalize,[
- libunistring=-lunistring
- have_u8normalize=yes
- ],[
- echo
- echo 'WARNING: xfs_scrub will not be built with Unicode libraries.'])
- AC_SUBST(libunistring)
- AC_SUBST(have_u8normalize)
- ])
vfs.c \
xfs_scrub.c
-LLDLIBS += $(LIBHANDLE) $(LIBFROG) $(LIBPTHREAD) $(LIBUNISTRING) $(LIBRT)
-LTDEPENDENCIES += $(LIBHANDLE) $(LIBFROG) $(LIBUNISTRING) $(LIBRT)
+LLDLIBS += $(LIBHANDLE) $(LIBFROG) $(LIBPTHREAD) $(LIBICU_LIBS) $(LIBRT)
+LTDEPENDENCIES += $(LIBHANDLE) $(LIBFROG)
LLDFLAGS = -static
ifeq ($(HAVE_MALLINFO),yes)
LCFLAGS += -DHAVE_LIBATTR
endif
-ifeq ($(HAVE_U8NORMALIZE),yes)
+ifeq ($(HAVE_LIBICU),yes)
CFILES += unicrash.c
-LCFLAGS += -DHAVE_U8NORMALIZE
+LCFLAGS += -DHAVE_LIBICU $(LIBICU_CFLAGS)
endif
ifeq ($(HAVE_SG_IO),yes)
#include <dirent.h>
#include <sys/types.h>
#include <sys/statvfs.h>
-#include <unistr.h>
-#include <uninorm.h>
+#include <strings.h>
+#include <unicode/ustring.h>
+#include <unicode/unorm2.h>
#include "path.h"
#include "xfs_scrub.h"
#include "common.h"
struct name_entry *next;
/* NFKC normalized name */
- uint8_t *normstr;
+ UChar *normstr;
size_t normstrlen;
xfs_ino_t ino;
struct unicrash {
struct scrub_ctx *ctx;
+ const UNormalizer2 *normalizer;
bool compare_ino;
size_t nr_buckets;
struct name_entry *buckets[0];
struct unicrash *uc,
struct name_entry *entry)
{
- uint8_t *normstr;
- size_t normstrlen;
-
- normstrlen = (entry->namelen * 2) + 1;
- normstr = calloc(normstrlen, sizeof(uint8_t));
- if (!normstr)
+ UChar *normstr;
+ UChar *unistr;
+ int32_t normstrlen;
+ int32_t unistrlen;
+ UErrorCode uerr = U_ZERO_ERROR;
+
+ /* Convert bytestr to unistr for normalization */
+ u_strFromUTF8(NULL, 0, &unistrlen, entry->name, entry->namelen, &uerr);
+ if (uerr != U_BUFFER_OVERFLOW_ERROR)
return false;
-
- if (!u8_normalize(UNINORM_NFKC, (const uint8_t *)entry->name,
- entry->namelen, normstr, &normstrlen));
+ uerr = U_ZERO_ERROR;
+ unistr = calloc(unistrlen + 1, sizeof(UChar));
+ if (!unistr)
+ return false;
+ u_strFromUTF8(unistr, unistrlen, NULL, entry->name, entry->namelen,
+ &uerr);
+ if (U_FAILURE(uerr))
+ goto out_unistr;
+
+ /* Normalize the string. */
+ normstrlen = unorm2_normalize(uc->normalizer, unistr, unistrlen, NULL,
+ 0, &uerr);
+ if (uerr != U_BUFFER_OVERFLOW_ERROR)
+ goto out_unistr;
+ uerr = U_ZERO_ERROR;
+ normstr = calloc(normstrlen + 1, sizeof(UChar));
+ if (!normstr)
+ goto out_unistr;
+ unorm2_normalize(uc->normalizer, unistr, unistrlen, normstr, normstrlen,
+ &uerr);
+ if (U_FAILURE(uerr))
goto out_normstr;
entry->normstr = normstr;
entry->normstrlen = normstrlen;
+ free(unistr);
return true;
+
out_normstr:
free(normstr);
+out_unistr:
+ free(unistr);
return false;
}
size_t namelen;
xfs_dahash_t hash;
- name = entry->normstr;
- namelen = entry->normstrlen;
+ name = (uint8_t *)entry->normstr;
+ namelen = entry->normstrlen * sizeof(UChar);
/*
* Do four characters at a time as long as we can.
size_t nr_buckets)
{
struct unicrash *p;
+ UErrorCode uerr = U_ZERO_ERROR;
if (!is_utf8_locale()) {
*ucp = NULL;
p->ctx = ctx;
p->nr_buckets = nr_buckets;
p->compare_ino = compare_ino;
+ p->normalizer = unorm2_getNFKCInstance(&uerr);
+ if (U_FAILURE(uerr))
+ goto out_free;
*ucp = p;
return true;
+out_free:
+ free(p);
+ return false;
}
/* Initialize the collision detector for a directory. */
while (entry != NULL) {
/* Same normalization? */
if (new_entry->normstrlen == entry->normstrlen &&
- !u8_strcmp(new_entry->normstr, entry->normstr) &&
+ !u_strcmp(new_entry->normstr, entry->normstr) &&
(uc->compare_ino ? entry->ino != new_entry->ino : true)) {
*badflags |= UNICRASH_NOT_UNIQUE;
*existing_entry = entry;
struct unicrash;
/* Unicode name collision detection. */
-#ifdef HAVE_U8NORMALIZE
+#ifdef HAVE_LIBICU
struct dirent;
# define unicrash_free(u) do {(u) = (u);} while (0)
# define unicrash_check_dir_name(u, d, n) (true)
# define unicrash_check_xattr_name(u, d, n) (true)
-#endif /* HAVE_U8NORMALIZE */
+#endif /* HAVE_LIBICU */
#endif /* XFS_SCRUB_UNICRASH_H_ */