]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From: Mike Travis <travis@sgi.com> |
2 | Subject: kdb: fix stack overflow for large NR_CPUS count | |
3 | References: bnc#440361 | |
4 | ||
5 | V2: remove spinlock | |
6 | ||
7 | In auditing the 2.6.27 source I found a *big* problem in kdb. The stack in | |
8 | kdb_bp will overflow with NR_CPUS=4096 ... the stack is 31744 bytes! | |
9 | ||
10 | ====== Stack (-l 500) | |
11 | ||
12 | 1 - x86_64_default_128 (NR_CPUS=128) | |
13 | 2 - x86_64_default (NR_CPUS=4096) | |
14 | ||
15 | .1. .2. ..final.. | |
16 | 1088 +31744 32832 +2917% kdb_bp | |
17 | 0 +2632 2632 . smp_call_function_mask | |
18 | 0 +2216 2216 . __build_sched_domains | |
19 | 0 +1544 1544 . tick_handle_oneshot_broadcast | |
20 | ||
21 | The problem looks like the static array in kdb_bp (template): | |
22 | ||
23 | static int | |
24 | kdb_bp(int argc, const char **argv) | |
25 | { | |
26 | ... | |
27 | kdb_bp_t template = {0}; | |
28 | ||
29 | Which has a field sized by NR_CPUS: | |
30 | ||
31 | kdbhard_bp_t *bp_hard[NR_CPUS]; /* Hardware breakpoint structure */ | |
32 | ||
33 | When NR_CPUS=4096, the above struct will overflow the 16k stack. | |
34 | ||
35 | The fix I implemented is to move the template to static memory. This struct | |
36 | does not need to be protected because KDB stops all the cpus and only accepts | |
37 | console input from one cpu at a time. | |
38 | ||
39 | Some other minor tweaks to reduce stack size also implemented: | |
40 | ||
41 | kdb_cpu_callback: use set_cpus_allowed_ptr() to prevent passing cpumask_t | |
42 | on the stack. | |
43 | ||
44 | kdb_per_cpu: use cpus_clear which prevents creating an empty cpumask_t | |
45 | (CPU_MASK_NONE) on the stack. | |
46 | ||
47 | ||
48 | For the next release, any arrays should be allocated using nr_cpu_ids. | |
49 | ||
50 | Based on linux-2.6.27. | |
51 | ||
52 | Signed-off-by: Mike Travis <travis@sgi.com> | |
53 | Acked-by: Bernhard Walle <bwalle@suse.de> | |
54 | ||
55 | --- | |
56 | kdb/kdb_bp.c | 27 +++++++++++++++------------ | |
57 | kdb/kdbmain.c | 12 ++++++------ | |
58 | 2 files changed, 21 insertions(+), 18 deletions(-) | |
59 | ||
60 | --- linux-2.6.27.orig/kdb/kdb_bp.c | |
61 | +++ linux-2.6.27/kdb/kdb_bp.c | |
62 | @@ -285,7 +285,7 @@ kdb_bp(int argc, const char **argv) | |
63 | char *symname = NULL; | |
64 | long offset = 0ul; | |
65 | int nextarg; | |
66 | - kdb_bp_t template = {0}; | |
67 | + static kdb_bp_t kdb_bp_template; | |
68 | ||
69 | if (argc == 0) { | |
70 | /* | |
71 | @@ -300,21 +300,23 @@ kdb_bp(int argc, const char **argv) | |
72 | return 0; | |
73 | } | |
74 | ||
75 | - template.bp_global = ((strcmp(argv[0], "bpa") == 0) | |
76 | + memset(&kdb_bp_template, 0, sizeof(kdb_bp_template)); | |
77 | + | |
78 | + kdb_bp_template.bp_global = ((strcmp(argv[0], "bpa") == 0) | |
79 | || (strcmp(argv[0], "bpha") == 0)); | |
80 | - template.bp_forcehw = ((strcmp(argv[0], "bph") == 0) | |
81 | + kdb_bp_template.bp_forcehw = ((strcmp(argv[0], "bph") == 0) | |
82 | || (strcmp(argv[0], "bpha") == 0)); | |
83 | ||
84 | /* Fix me: "bp" is treated as "bpa" to avoid system freeze. -jlan */ | |
85 | if (strcmp(argv[0], "bp") == 0) | |
86 | - template.bp_global = 1; | |
87 | + kdb_bp_template.bp_global = 1; | |
88 | ||
89 | nextarg = 1; | |
90 | - diag = kdbgetaddrarg(argc, argv, &nextarg, &template.bp_addr, | |
91 | + diag = kdbgetaddrarg(argc, argv, &nextarg, &kdb_bp_template.bp_addr, | |
92 | &offset, &symname); | |
93 | if (diag) | |
94 | return diag; | |
95 | - if (!template.bp_addr) | |
96 | + if (!kdb_bp_template.bp_addr) | |
97 | return KDB_BADINT; | |
98 | ||
99 | /* | |
100 | @@ -333,7 +335,7 @@ kdb_bp(int argc, const char **argv) | |
101 | /* | |
102 | * Handle architecture dependent parsing | |
103 | */ | |
104 | - diag = kdba_parsebp(argc, argv, &nextarg, &template); | |
105 | + diag = kdba_parsebp(argc, argv, &nextarg, &kdb_bp_template); | |
106 | if (diag) { | |
107 | return diag; | |
108 | } | |
109 | @@ -348,20 +350,21 @@ kdb_bp(int argc, const char **argv) | |
110 | */ | |
111 | for(i=0,bp_check=kdb_breakpoints; i<KDB_MAXBPT; i++,bp_check++) { | |
112 | if (!bp_check->bp_free && | |
113 | - bp_check->bp_addr == template.bp_addr && | |
114 | + bp_check->bp_addr == kdb_bp_template.bp_addr && | |
115 | (bp_check->bp_global || | |
116 | - bp_check->bp_cpu == template.bp_cpu)) { | |
117 | - kdb_printf("You already have a breakpoint at " kdb_bfd_vma_fmt0 "\n", template.bp_addr); | |
118 | + bp_check->bp_cpu == kdb_bp_template.bp_cpu)) { | |
119 | + kdb_printf("You already have a breakpoint at " | |
120 | + kdb_bfd_vma_fmt0 "\n", kdb_bp_template.bp_addr); | |
121 | return KDB_DUPBPT; | |
122 | } | |
123 | } | |
124 | ||
125 | - template.bp_enabled = 1; | |
126 | + kdb_bp_template.bp_enabled = 1; | |
127 | ||
128 | /* | |
129 | * Actually allocate the breakpoint found earlier | |
130 | */ | |
131 | - *bp = template; | |
132 | + *bp = kdb_bp_template; | |
133 | bp->bp_free = 0; | |
134 | ||
135 | if (!bp->bp_global) { | |
136 | --- linux-2.6.27.orig/kdb/kdbmain.c | |
137 | +++ linux-2.6.27/kdb/kdbmain.c | |
138 | @@ -3715,13 +3715,14 @@ kdb_per_cpu(int argc, const char **argv) | |
139 | { | |
140 | char buf[256], fmtstr[64]; | |
141 | kdb_symtab_t symtab; | |
142 | - cpumask_t suppress = CPU_MASK_NONE; | |
143 | + cpumask_t suppress; | |
144 | int cpu, diag; | |
145 | unsigned long addr, val, bytesperword = 0, whichcpu = ~0UL; | |
146 | ||
147 | if (argc < 1 || argc > 3) | |
148 | return KDB_ARGCOUNT; | |
149 | ||
150 | + cpus_clear(suppress); | |
151 | snprintf(buf, sizeof(buf), "per_cpu__%s", argv[1]); | |
152 | if (!kdbgetsymval(buf, &symtab)) { | |
153 | kdb_printf("%s is not a per_cpu variable\n", argv[1]); | |
154 | @@ -3779,7 +3780,7 @@ kdb_per_cpu(int argc, const char **argv) | |
155 | if (cpus_weight(suppress) == 0) | |
156 | return 0; | |
157 | kdb_printf("Zero suppressed cpu(s):"); | |
158 | - for (cpu = first_cpu(suppress); cpu < NR_CPUS; cpu = next_cpu(cpu, suppress)) { | |
159 | + for_each_cpu_mask(cpu, suppress) { | |
160 | kdb_printf(" %d", cpu); | |
161 | if (cpu == NR_CPUS-1 || next_cpu(cpu, suppress) != cpu + 1) | |
162 | continue; | |
163 | @@ -4234,10 +4235,9 @@ kdb_cpu_callback(struct notifier_block * | |
164 | if (action == CPU_ONLINE) { | |
165 | int cpu =(unsigned long)hcpu; | |
166 | cpumask_t save_cpus_allowed = current->cpus_allowed; | |
167 | - cpumask_t new_cpus_allowed = cpumask_of_cpu(cpu); | |
168 | - set_cpus_allowed(current, new_cpus_allowed); | |
169 | - kdb(KDB_REASON_CPU_UP, 0, NULL); /* do kdb setup on this cpu */ | |
170 | - set_cpus_allowed(current, save_cpus_allowed); | |
171 | + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | |
172 | + kdb(KDB_REASON_CPU_UP, 0, NULL); /* do kdb setup on this cpu */ | |
173 | + set_cpus_allowed_ptr(current, &save_cpus_allowed); | |
174 | } | |
175 | return NOTIFY_OK; | |
176 | } |