]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blame - releases/6.6.26/modpost-optimize-symbol-search-from-linear-to-binary.patch
Linux 6.6.26
[thirdparty/kernel/stable-queue.git] / releases / 6.6.26 / modpost-optimize-symbol-search-from-linear-to-binary.patch
CommitLineData
ffc1c2fe
SL
1From d47bddab5a7df4aa75a6a464c4fe4bbeb16779b4 Mon Sep 17 00:00:00 2001
2From: Sasha Levin <sashal@kernel.org>
3Date: Tue, 26 Sep 2023 08:40:44 -0400
4Subject: modpost: Optimize symbol search from linear to binary search
5
6From: Jack Brennen <jbrennen@google.com>
7
8[ Upstream commit 4074532758c5c367d3fcb8d124150824a254659d ]
9
10Modify modpost to use binary search for converting addresses back
11into symbol references. Previously it used linear search.
12
13This change saves a few seconds of wall time for defconfig builds,
14but can save several minutes on allyesconfigs.
15
16Before:
17$ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
18$ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o
19198.38user 1.27system 3:19.71elapsed
20
21After:
22$ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
23$ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o
2411.91user 0.85system 0:12.78elapsed
25
26Signed-off-by: Jack Brennen <jbrennen@google.com>
27Tested-by: Nick Desaulniers <ndesaulniers@google.com>
28Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
29Stable-dep-of: 1102f9f85bf6 ("modpost: do not make find_tosym() return NULL")
30Signed-off-by: Sasha Levin <sashal@kernel.org>
31---
32 scripts/mod/Makefile | 4 +-
33 scripts/mod/modpost.c | 70 ++------------
34 scripts/mod/modpost.h | 25 +++++
35 scripts/mod/symsearch.c | 199 ++++++++++++++++++++++++++++++++++++++++
36 4 files changed, 232 insertions(+), 66 deletions(-)
37 create mode 100644 scripts/mod/symsearch.c
38
39diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
40index c9e38ad937fd4..3c54125eb3733 100644
41--- a/scripts/mod/Makefile
42+++ b/scripts/mod/Makefile
43@@ -5,7 +5,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
44 hostprogs-always-y += modpost mk_elfconfig
45 always-y += empty.o
46
47-modpost-objs := modpost.o file2alias.o sumversion.o
48+modpost-objs := modpost.o file2alias.o sumversion.o symsearch.o
49
50 devicetable-offsets-file := devicetable-offsets.h
51
52@@ -16,7 +16,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s
53
54 # dependencies on generated files need to be listed explicitly
55
56-$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
57+$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
58 $(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)
59
60 quiet_cmd_elfconfig = MKELF $@
61diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
62index 5191fdbd3fa23..66589fb4e9aef 100644
63--- a/scripts/mod/modpost.c
64+++ b/scripts/mod/modpost.c
65@@ -22,7 +22,6 @@
66 #include <errno.h>
67 #include "modpost.h"
68 #include "../../include/linux/license.h"
69-#include "../../include/linux/module_symbol.h"
70
71 static bool module_enabled;
72 /* Are we using CONFIG_MODVERSIONS? */
73@@ -577,11 +576,14 @@ static int parse_elf(struct elf_info *info, const char *filename)
74 *p = TO_NATIVE(*p);
75 }
76
77+ symsearch_init(info);
78+
79 return 1;
80 }
81
82 static void parse_elf_finish(struct elf_info *info)
83 {
84+ symsearch_finish(info);
85 release_file(info->hdr, info->size);
86 }
87
88@@ -1042,71 +1044,10 @@ static int secref_whitelist(const char *fromsec, const char *fromsym,
89 return 1;
90 }
91
92-/*
93- * If there's no name there, ignore it; likewise, ignore it if it's
94- * one of the magic symbols emitted used by current tools.
95- *
96- * Otherwise if find_symbols_between() returns those symbols, they'll
97- * fail the whitelist tests and cause lots of false alarms ... fixable
98- * only by merging __exit and __init sections into __text, bloating
99- * the kernel (which is especially evil on embedded platforms).
100- */
101-static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
102-{
103- const char *name = elf->strtab + sym->st_name;
104-
105- if (!name || !strlen(name))
106- return 0;
107- return !is_mapping_symbol(name);
108-}
109-
110-/* Look up the nearest symbol based on the section and the address */
111-static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
112- unsigned int secndx, bool allow_negative,
113- Elf_Addr min_distance)
114-{
115- Elf_Sym *sym;
116- Elf_Sym *near = NULL;
117- Elf_Addr sym_addr, distance;
118- bool is_arm = (elf->hdr->e_machine == EM_ARM);
119-
120- for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
121- if (get_secindex(elf, sym) != secndx)
122- continue;
123- if (!is_valid_name(elf, sym))
124- continue;
125-
126- sym_addr = sym->st_value;
127-
128- /*
129- * For ARM Thumb instruction, the bit 0 of st_value is set
130- * if the symbol is STT_FUNC type. Mask it to get the address.
131- */
132- if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
133- sym_addr &= ~1;
134-
135- if (addr >= sym_addr)
136- distance = addr - sym_addr;
137- else if (allow_negative)
138- distance = sym_addr - addr;
139- else
140- continue;
141-
142- if (distance <= min_distance) {
143- min_distance = distance;
144- near = sym;
145- }
146-
147- if (min_distance == 0)
148- break;
149- }
150- return near;
151-}
152-
153 static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr,
154 unsigned int secndx)
155 {
156- return find_nearest_sym(elf, addr, secndx, false, ~0);
157+ return symsearch_find_nearest(elf, addr, secndx, false, ~0);
158 }
159
160 static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
161@@ -1119,7 +1060,8 @@ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
162 * Strive to find a better symbol name, but the resulting name may not
163 * match the symbol referenced in the original code.
164 */
165- return find_nearest_sym(elf, addr, get_secindex(elf, sym), true, 20);
166+ return symsearch_find_nearest(elf, addr, get_secindex(elf, sym),
167+ true, 20);
168 }
169
170 static bool is_executable_section(struct elf_info *elf, unsigned int secndx)
171diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
172index 5f94c2c9f2d95..6413f26fcb6b4 100644
173--- a/scripts/mod/modpost.h
174+++ b/scripts/mod/modpost.h
175@@ -10,6 +10,7 @@
176 #include <fcntl.h>
177 #include <unistd.h>
178 #include <elf.h>
179+#include "../../include/linux/module_symbol.h"
180
181 #include "list.h"
182 #include "elfconfig.h"
183@@ -128,6 +129,8 @@ struct elf_info {
184 * take shndx from symtab_shndx_start[N] instead */
185 Elf32_Word *symtab_shndx_start;
186 Elf32_Word *symtab_shndx_stop;
187+
188+ struct symsearch *symsearch;
189 };
190
191 /* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
192@@ -154,6 +157,28 @@ static inline unsigned int get_secindex(const struct elf_info *info,
193 return index;
194 }
195
196+/*
197+ * If there's no name there, ignore it; likewise, ignore it if it's
198+ * one of the magic symbols emitted used by current tools.
199+ *
200+ * Internal symbols created by tools should be ignored by modpost.
201+ */
202+static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
203+{
204+ const char *name = elf->strtab + sym->st_name;
205+
206+ if (!name || !strlen(name))
207+ return 0;
208+ return !is_mapping_symbol(name);
209+}
210+
211+/* symsearch.c */
212+void symsearch_init(struct elf_info *elf);
213+void symsearch_finish(struct elf_info *elf);
214+Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
215+ unsigned int secndx, bool allow_negative,
216+ Elf_Addr min_distance);
217+
218 /* file2alias.c */
219 void handle_moddevtable(struct module *mod, struct elf_info *info,
220 Elf_Sym *sym, const char *symname);
221diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
222new file mode 100644
223index 0000000000000..aa4ed51f9960c
224--- /dev/null
225+++ b/scripts/mod/symsearch.c
226@@ -0,0 +1,199 @@
227+// SPDX-License-Identifier: GPL-2.0
228+
229+/*
230+ * Helper functions for finding the symbol in an ELF which is "nearest"
231+ * to a given address.
232+ */
233+
234+#include "modpost.h"
235+
236+struct syminfo {
237+ unsigned int symbol_index;
238+ unsigned int section_index;
239+ Elf_Addr addr;
240+};
241+
242+/*
243+ * Container used to hold an entire binary search table.
244+ * Entries in table are ascending, sorted first by section_index,
245+ * then by addr, and last by symbol_index. The sorting by
246+ * symbol_index is used to ensure predictable behavior when
247+ * multiple symbols are present with the same address; all
248+ * symbols past the first are effectively ignored, by eliding
249+ * them in symsearch_fixup().
250+ */
251+struct symsearch {
252+ unsigned int table_size;
253+ struct syminfo table[];
254+};
255+
256+static int syminfo_compare(const void *s1, const void *s2)
257+{
258+ const struct syminfo *sym1 = s1;
259+ const struct syminfo *sym2 = s2;
260+
261+ if (sym1->section_index > sym2->section_index)
262+ return 1;
263+ if (sym1->section_index < sym2->section_index)
264+ return -1;
265+ if (sym1->addr > sym2->addr)
266+ return 1;
267+ if (sym1->addr < sym2->addr)
268+ return -1;
269+ if (sym1->symbol_index > sym2->symbol_index)
270+ return 1;
271+ if (sym1->symbol_index < sym2->symbol_index)
272+ return -1;
273+ return 0;
274+}
275+
276+static unsigned int symbol_count(struct elf_info *elf)
277+{
278+ unsigned int result = 0;
279+
280+ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
281+ if (is_valid_name(elf, sym))
282+ result++;
283+ }
284+ return result;
285+}
286+
287+/*
288+ * Populate the search array that we just allocated.
289+ * Be slightly paranoid here. The ELF file is mmap'd and could
290+ * conceivably change between symbol_count() and symsearch_populate().
291+ * If we notice any difference, bail out rather than potentially
292+ * propagating errors or crashing.
293+ */
294+static void symsearch_populate(struct elf_info *elf,
295+ struct syminfo *table,
296+ unsigned int table_size)
297+{
298+ bool is_arm = (elf->hdr->e_machine == EM_ARM);
299+
300+ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
301+ if (is_valid_name(elf, sym)) {
302+ if (table_size-- == 0)
303+ fatal("%s: size mismatch\n", __func__);
304+ table->symbol_index = sym - elf->symtab_start;
305+ table->section_index = get_secindex(elf, sym);
306+ table->addr = sym->st_value;
307+
308+ /*
309+ * For ARM Thumb instruction, the bit 0 of st_value is
310+ * set if the symbol is STT_FUNC type. Mask it to get
311+ * the address.
312+ */
313+ if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
314+ table->addr &= ~1;
315+
316+ table++;
317+ }
318+ }
319+
320+ if (table_size != 0)
321+ fatal("%s: size mismatch\n", __func__);
322+}
323+
324+/*
325+ * Do any fixups on the table after sorting.
326+ * For now, this just finds adjacent entries which have
327+ * the same section_index and addr, and it propagates
328+ * the first symbol_index over the subsequent entries,
329+ * so that only one symbol_index is seen for any given
330+ * section_index and addr. This ensures that whether
331+ * we're looking at an address from "above" or "below"
332+ * that we see the same symbol_index.
333+ * This does leave some duplicate entries in the table;
334+ * in practice, these are a small fraction of the
335+ * total number of entries, and they are harmless to
336+ * the binary search algorithm other than a few occasional
337+ * unnecessary comparisons.
338+ */
339+static void symsearch_fixup(struct syminfo *table, unsigned int table_size)
340+{
341+ /* Don't look at index 0, it will never change. */
342+ for (unsigned int i = 1; i < table_size; i++) {
343+ if (table[i].addr == table[i - 1].addr &&
344+ table[i].section_index == table[i - 1].section_index) {
345+ table[i].symbol_index = table[i - 1].symbol_index;
346+ }
347+ }
348+}
349+
350+void symsearch_init(struct elf_info *elf)
351+{
352+ unsigned int table_size = symbol_count(elf);
353+
354+ elf->symsearch = NOFAIL(malloc(sizeof(struct symsearch) +
355+ sizeof(struct syminfo) * table_size));
356+ elf->symsearch->table_size = table_size;
357+
358+ symsearch_populate(elf, elf->symsearch->table, table_size);
359+ qsort(elf->symsearch->table, table_size,
360+ sizeof(struct syminfo), syminfo_compare);
361+
362+ symsearch_fixup(elf->symsearch->table, table_size);
363+}
364+
365+void symsearch_finish(struct elf_info *elf)
366+{
367+ free(elf->symsearch);
368+ elf->symsearch = NULL;
369+}
370+
371+/*
372+ * Find the syminfo which is in secndx and "nearest" to addr.
373+ * allow_negative: allow returning a symbol whose address is > addr.
374+ * min_distance: ignore symbols which are further away than this.
375+ *
376+ * Returns a pointer into the symbol table for success.
377+ * Returns NULL if no legal symbol is found within the requested range.
378+ */
379+Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
380+ unsigned int secndx, bool allow_negative,
381+ Elf_Addr min_distance)
382+{
383+ unsigned int hi = elf->symsearch->table_size;
384+ unsigned int lo = 0;
385+ struct syminfo *table = elf->symsearch->table;
386+ struct syminfo target;
387+
388+ target.addr = addr;
389+ target.section_index = secndx;
390+ target.symbol_index = ~0; /* compares greater than any actual index */
391+ while (hi > lo) {
392+ unsigned int mid = lo + (hi - lo) / 2; /* Avoids overflow */
393+
394+ if (syminfo_compare(&table[mid], &target) > 0)
395+ hi = mid;
396+ else
397+ lo = mid + 1;
398+ }
399+
400+ /*
401+ * table[hi], if it exists, is the first entry in the array which
402+ * lies beyond target. table[hi - 1], if it exists, is the last
403+ * entry in the array which comes before target, including the
404+ * case where it perfectly matches the section and the address.
405+ *
406+ * Note -- if the address we're looking up falls perfectly
407+ * in the middle of two symbols, this is written to always
408+ * prefer the symbol with the lower address.
409+ */
410+ Elf_Sym *result = NULL;
411+
412+ if (allow_negative &&
413+ hi < elf->symsearch->table_size &&
414+ table[hi].section_index == secndx &&
415+ table[hi].addr - addr <= min_distance) {
416+ min_distance = table[hi].addr - addr;
417+ result = &elf->symtab_start[table[hi].symbol_index];
418+ }
419+ if (hi > 0 &&
420+ table[hi - 1].section_index == secndx &&
421+ addr - table[hi - 1].addr <= min_distance) {
422+ result = &elf->symtab_start[table[hi - 1].symbol_index];
423+ }
424+ return result;
425+}
426--
4272.43.0
428