]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
955bd501 | 2 | |
11c3a366 | 3 | #include <errno.h> |
955bd501 KS |
4 | #include <stdlib.h> |
5 | #include <string.h> | |
6 | ||
b5efdb8a | 7 | #include "alloc-util.h" |
955bd501 | 8 | #include "strbuf.h" |
d6c5d19b | 9 | #include "util.h" |
955bd501 | 10 | |
4693cfb3 | 11 | /* |
ab06eef8 | 12 | * Strbuf stores given strings in a single continuous allocated memory |
f1c0ece1 KS |
13 | * area. Identical strings are de-duplicated and return the same offset |
14 | * as the first string stored. If the tail of a string already exists | |
15 | * in the buffer, the tail is returned. | |
4693cfb3 | 16 | * |
f1c0ece1 KS |
17 | * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the |
18 | * information about the stored strings. | |
4693cfb3 KS |
19 | * |
20 | * Example of udev rules: | |
21 | * $ ./udevadm test . | |
22 | * ... | |
23 | * read rules file: /usr/lib/udev/rules.d/99-systemd.rules | |
24 | * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings | |
25 | * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used | |
26 | * ... | |
27 | */ | |
28 | ||
955bd501 KS |
29 | struct strbuf *strbuf_new(void) { |
30 | struct strbuf *str; | |
31 | ||
2fb076ad | 32 | str = new(struct strbuf, 1); |
955bd501 KS |
33 | if (!str) |
34 | return NULL; | |
2fb076ad ZJS |
35 | *str = (struct strbuf) { |
36 | .buf = new0(char, 1), | |
37 | .root = new0(struct strbuf_node, 1), | |
38 | .len = 1, | |
39 | .nodes_count = 1, | |
40 | }; | |
41 | if (!str->buf || !str->root) { | |
42 | free(str->buf); | |
43 | free(str->root); | |
44 | return mfree(str); | |
45 | } | |
955bd501 | 46 | |
955bd501 | 47 | return str; |
955bd501 KS |
48 | } |
49 | ||
2fb076ad | 50 | static struct strbuf_node* strbuf_node_cleanup(struct strbuf_node *node) { |
955bd501 KS |
51 | size_t i; |
52 | ||
53 | for (i = 0; i < node->children_count; i++) | |
54 | strbuf_node_cleanup(node->children[i].child); | |
55 | free(node->children); | |
2fb076ad | 56 | return mfree(node); |
955bd501 KS |
57 | } |
58 | ||
4693cfb3 | 59 | /* clean up trie data, leave only the string buffer */ |
955bd501 KS |
60 | void strbuf_complete(struct strbuf *str) { |
61 | if (!str) | |
62 | return; | |
63 | if (str->root) | |
2fb076ad | 64 | str->root = strbuf_node_cleanup(str->root); |
955bd501 KS |
65 | } |
66 | ||
4693cfb3 | 67 | /* clean up everything */ |
955bd501 | 68 | void strbuf_cleanup(struct strbuf *str) { |
2fb076ad | 69 | strbuf_complete(str); |
955bd501 KS |
70 | free(str->buf); |
71 | free(str); | |
72 | } | |
73 | ||
3c8bed4e ZJS |
74 | static int strbuf_children_cmp(const struct strbuf_child_entry *n1, |
75 | const struct strbuf_child_entry *n2) { | |
955bd501 KS |
76 | return n1->c - n2->c; |
77 | } | |
78 | ||
3c8bed4e ZJS |
79 | static void bubbleinsert(struct strbuf_node *node, |
80 | uint8_t c, | |
81 | struct strbuf_node *node_child) { | |
82 | ||
83 | struct strbuf_child_entry new = { | |
84 | .c = c, | |
85 | .child = node_child, | |
86 | }; | |
87 | int left = 0, right = node->children_count; | |
88 | ||
89 | while (right > left) { | |
90 | int middle = (right + left) / 2 ; | |
91 | if (strbuf_children_cmp(&node->children[middle], &new) <= 0) | |
92 | left = middle + 1; | |
93 | else | |
94 | right = middle; | |
95 | } | |
96 | ||
97 | memmove(node->children + left + 1, node->children + left, | |
98 | sizeof(struct strbuf_child_entry) * (node->children_count - left)); | |
99 | node->children[left] = new; | |
100 | ||
313cefa1 | 101 | node->children_count++; |
3c8bed4e ZJS |
102 | } |
103 | ||
4693cfb3 | 104 | /* add string, return the index/offset into the buffer */ |
955bd501 KS |
105 | ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) { |
106 | uint8_t c; | |
107 | struct strbuf_node *node; | |
108 | size_t depth; | |
109 | char *buf_new; | |
110 | struct strbuf_child_entry *child; | |
111 | struct strbuf_node *node_child; | |
112 | ssize_t off; | |
113 | ||
114 | if (!str->root) | |
115 | return -EINVAL; | |
116 | ||
117 | /* search string; start from last character to find possibly matching tails */ | |
435ce146 | 118 | |
955bd501 | 119 | str->in_count++; |
435ce146 ZJS |
120 | if (len == 0) { |
121 | str->dedup_count++; | |
122 | return 0; | |
123 | } | |
955bd501 KS |
124 | str->in_len += len; |
125 | ||
126 | node = str->root; | |
955bd501 KS |
127 | for (depth = 0; depth <= len; depth++) { |
128 | struct strbuf_child_entry search; | |
129 | ||
130 | /* match against current node */ | |
131 | off = node->value_off + node->value_len - len; | |
132 | if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) { | |
133 | str->dedup_len += len; | |
134 | str->dedup_count++; | |
135 | return off; | |
136 | } | |
137 | ||
c45606eb LR |
138 | c = s[len - 1 - depth]; |
139 | ||
955bd501 | 140 | /* lookup child node */ |
955bd501 | 141 | search.c = c; |
d6c5d19b ZJS |
142 | child = bsearch_safe(&search, node->children, node->children_count, |
143 | sizeof(struct strbuf_child_entry), | |
144 | (__compar_fn_t) strbuf_children_cmp); | |
955bd501 KS |
145 | if (!child) |
146 | break; | |
147 | node = child->child; | |
148 | } | |
149 | ||
150 | /* add new string */ | |
151 | buf_new = realloc(str->buf, str->len + len+1); | |
152 | if (!buf_new) | |
153 | return -ENOMEM; | |
154 | str->buf = buf_new; | |
155 | off = str->len; | |
156 | memcpy(str->buf + off, s, len); | |
157 | str->len += len; | |
158 | str->buf[str->len++] = '\0'; | |
159 | ||
160 | /* new node */ | |
2fb076ad | 161 | node_child = new(struct strbuf_node, 1); |
955bd501 KS |
162 | if (!node_child) |
163 | return -ENOMEM; | |
2fb076ad ZJS |
164 | *node_child = (struct strbuf_node) { |
165 | .value_off = off, | |
166 | .value_len = len, | |
167 | }; | |
955bd501 KS |
168 | |
169 | /* extend array, add new entry, sort for bisection */ | |
62d74c78 | 170 | child = reallocarray(node->children, node->children_count + 1, sizeof(struct strbuf_child_entry)); |
a9c307e5 ZJS |
171 | if (!child) { |
172 | free(node_child); | |
955bd501 | 173 | return -ENOMEM; |
a9c307e5 ZJS |
174 | } |
175 | ||
176 | str->nodes_count++; | |
177 | ||
955bd501 | 178 | node->children = child; |
3c8bed4e | 179 | bubbleinsert(node, c, node_child); |
955bd501 KS |
180 | |
181 | return off; | |
182 | } |