]> git.ipfire.org Git - thirdparty/e2fsprogs.git/blame - lib/ext2fs/nls_utf8.c
ext2fs: nls: support UTF-8 11.0 with NFKD normalization
[thirdparty/e2fsprogs.git] / lib / ext2fs / nls_utf8.c
CommitLineData
c2f9875c
GKB
1/*
2 * Copyright (c) 2018 Collabora Ltd.
3 * All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 */
15
16/*
17 * This code is adapted from the Linux Kernel. We have a
18 * userspace version here such that the hashes will match that
19 * implementation.
20 */
21
22#include "nls.h"
23#include "utf8n.h"
24
25#include <limits.h>
26#include <errno.h>
27
28static int utf8_casefold(const struct nls_table *table,
29 const unsigned char *str, size_t len,
30 unsigned char *dest, size_t dlen)
31{
32 const struct utf8data *data = utf8nfkdicf(table->version);
33 struct utf8cursor cur;
34 size_t nlen = 0;
35
36 if (utf8ncursor(&cur, data, str, len) < 0)
37 goto invalid_seq;
38
39 for (nlen = 0; nlen < dlen; nlen++) {
40 dest[nlen] = utf8byte(&cur);
41 if (!dest[nlen])
42 return nlen;
43 if (dest[nlen] == -1)
44 break;
45 }
46
47 return -ENAMETOOLONG;
48
49invalid_seq:
50 if (dlen < len)
51 return -ENAMETOOLONG;
52
53 /* Signal invalid sequence */
54 return -EINVAL;
55}
56
57static int utf8_normalize(const struct nls_table *table,
58 const unsigned char *str, size_t len,
59 unsigned char *dest, size_t dlen)
60{
61 const struct utf8data *data = utf8nfkdi(table->version);
62 struct utf8cursor cur;
63 ssize_t nlen = 0;
64
65 if (utf8ncursor(&cur, data, str, len) < 0)
66 goto invalid_seq;
67
68 for (nlen = 0; nlen < dlen; nlen++) {
69 dest[nlen] = utf8byte(&cur);
70 if (!dest[nlen])
71 return nlen;
72 if (dest[nlen] == -1)
73 break;
74 }
75
76 return -ENAMETOOLONG;
77
78invalid_seq:
79 if (dlen < len)
80 return -ENAMETOOLONG;
81
82 /* Signal invalid sequence */
83 return -EINVAL;
84}
85
86const static struct nls_ops utf8_ops = {
87 .casefold = utf8_casefold,
88 .normalize = utf8_normalize,
89
90};
91
92const struct nls_table nls_utf8_11_0 = {
93 .ops = &utf8_ops,
94 .version = UNICODE_AGE(11, 0, 0),
95};