]>
Commit | Line | Data |
---|---|---|
757bf1df DM |
1 | /* An experimental state machine, for tracking "taint": unsanitized uses |
2 | of data potentially under an attacker's control. | |
3 | ||
4 | Copyright (C) 2019-2020 Free Software Foundation, Inc. | |
5 | Contributed by David Malcolm <dmalcolm@redhat.com>. | |
6 | ||
7 | This file is part of GCC. | |
8 | ||
9 | GCC is free software; you can redistribute it and/or modify it | |
10 | under the terms of the GNU General Public License as published by | |
11 | the Free Software Foundation; either version 3, or (at your option) | |
12 | any later version. | |
13 | ||
14 | GCC is distributed in the hope that it will be useful, but | |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | General Public License for more details. | |
18 | ||
19 | You should have received a copy of the GNU General Public License | |
20 | along with GCC; see the file COPYING3. If not see | |
21 | <http://www.gnu.org/licenses/>. */ | |
22 | ||
23 | #include "config.h" | |
24 | #include "system.h" | |
25 | #include "coretypes.h" | |
26 | #include "tree.h" | |
27 | #include "function.h" | |
28 | #include "basic-block.h" | |
29 | #include "gimple.h" | |
30 | #include "options.h" | |
31 | #include "diagnostic-path.h" | |
32 | #include "diagnostic-metadata.h" | |
33 | #include "function.h" | |
34 | #include "analyzer/analyzer.h" | |
35 | #include "diagnostic-event-id.h" | |
36 | #include "analyzer/analyzer-logging.h" | |
37 | #include "analyzer/sm.h" | |
38 | #include "analyzer/pending-diagnostic.h" | |
39 | ||
40 | #if ENABLE_ANALYZER | |
41 | ||
75038aa6 DM |
42 | namespace ana { |
43 | ||
757bf1df DM |
44 | namespace { |
45 | ||
46 | /* An experimental state machine, for tracking "taint": unsanitized uses | |
47 | of data potentially under an attacker's control. */ | |
48 | ||
49 | class taint_state_machine : public state_machine | |
50 | { | |
51 | public: | |
52 | taint_state_machine (logger *logger); | |
53 | ||
54 | bool inherited_state_p () const FINAL OVERRIDE { return true; } | |
55 | ||
56 | bool on_stmt (sm_context *sm_ctxt, | |
57 | const supernode *node, | |
58 | const gimple *stmt) const FINAL OVERRIDE; | |
59 | ||
60 | void on_condition (sm_context *sm_ctxt, | |
61 | const supernode *node, | |
62 | const gimple *stmt, | |
63 | tree lhs, | |
64 | enum tree_code op, | |
65 | tree rhs) const FINAL OVERRIDE; | |
66 | ||
67 | bool can_purge_p (state_t s) const FINAL OVERRIDE; | |
68 | ||
69 | /* Start state. */ | |
70 | state_t m_start; | |
71 | ||
72 | /* State for a "tainted" value: unsanitized data potentially under an | |
73 | attacker's control. */ | |
74 | state_t m_tainted; | |
75 | ||
76 | /* State for a "tainted" value that has a lower bound. */ | |
77 | state_t m_has_lb; | |
78 | ||
79 | /* State for a "tainted" value that has an upper bound. */ | |
80 | state_t m_has_ub; | |
81 | ||
82 | /* Stop state, for a value we don't want to track any more. */ | |
83 | state_t m_stop; | |
84 | }; | |
85 | ||
86 | enum bounds | |
87 | { | |
88 | BOUNDS_NONE, | |
89 | BOUNDS_UPPER, | |
90 | BOUNDS_LOWER | |
91 | }; | |
92 | ||
93 | class tainted_array_index | |
94 | : public pending_diagnostic_subclass<tainted_array_index> | |
95 | { | |
96 | public: | |
97 | tainted_array_index (const taint_state_machine &sm, tree arg, | |
98 | enum bounds has_bounds) | |
99 | : m_sm (sm), m_arg (arg), m_has_bounds (has_bounds) {} | |
100 | ||
101 | const char *get_kind () const FINAL OVERRIDE { return "tainted_array_index"; } | |
102 | ||
103 | bool operator== (const tainted_array_index &other) const | |
104 | { | |
14f9d7b9 | 105 | return same_tree_p (m_arg, other.m_arg); |
757bf1df DM |
106 | } |
107 | ||
108 | bool emit (rich_location *rich_loc) FINAL OVERRIDE | |
109 | { | |
110 | diagnostic_metadata m; | |
111 | m.add_cwe (129); | |
112 | switch (m_has_bounds) | |
113 | { | |
114 | default: | |
115 | gcc_unreachable (); | |
116 | case BOUNDS_NONE: | |
117 | return warning_at (rich_loc, m, OPT_Wanalyzer_tainted_array_index, | |
118 | "use of tainted value %qE in array lookup" | |
119 | " without bounds checking", | |
120 | m_arg); | |
121 | break; | |
122 | case BOUNDS_UPPER: | |
123 | return warning_at (rich_loc, m, OPT_Wanalyzer_tainted_array_index, | |
124 | "use of tainted value %qE in array lookup" | |
125 | " without lower-bounds checking", | |
126 | m_arg); | |
127 | break; | |
128 | case BOUNDS_LOWER: | |
129 | return warning_at (rich_loc, m, OPT_Wanalyzer_tainted_array_index, | |
130 | "use of tainted value %qE in array lookup" | |
131 | " without upper-bounds checking", | |
132 | m_arg); | |
133 | break; | |
134 | } | |
135 | } | |
136 | ||
137 | label_text describe_state_change (const evdesc::state_change &change) | |
138 | FINAL OVERRIDE | |
139 | { | |
140 | if (change.m_new_state == m_sm.m_tainted) | |
141 | { | |
142 | if (change.m_origin) | |
143 | return change.formatted_print ("%qE has an unchecked value here" | |
144 | " (from %qE)", | |
145 | change.m_expr, change.m_origin); | |
146 | else | |
147 | return change.formatted_print ("%qE gets an unchecked value here", | |
148 | change.m_expr); | |
149 | } | |
150 | else if (change.m_new_state == m_sm.m_has_lb) | |
151 | return change.formatted_print ("%qE has its lower bound checked here", | |
152 | change.m_expr); | |
153 | else if (change.m_new_state == m_sm.m_has_ub) | |
154 | return change.formatted_print ("%qE has its upper bound checked here", | |
155 | change.m_expr); | |
156 | return label_text (); | |
157 | } | |
158 | ||
159 | label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE | |
160 | { | |
161 | switch (m_has_bounds) | |
162 | { | |
163 | default: | |
164 | gcc_unreachable (); | |
165 | case BOUNDS_NONE: | |
166 | return ev.formatted_print ("use of tainted value %qE in array lookup" | |
167 | " without bounds checking", | |
168 | m_arg); | |
169 | case BOUNDS_UPPER: | |
170 | return ev.formatted_print ("use of tainted value %qE in array lookup" | |
171 | " without lower-bounds checking", | |
172 | m_arg); | |
173 | case BOUNDS_LOWER: | |
174 | return ev.formatted_print ("use of tainted value %qE in array lookup" | |
175 | " without upper-bounds checking", | |
176 | m_arg); | |
177 | } | |
178 | } | |
179 | ||
180 | private: | |
181 | const taint_state_machine &m_sm; | |
182 | tree m_arg; | |
183 | enum bounds m_has_bounds; | |
184 | }; | |
185 | ||
186 | /* taint_state_machine's ctor. */ | |
187 | ||
188 | taint_state_machine::taint_state_machine (logger *logger) | |
189 | : state_machine ("taint", logger) | |
190 | { | |
191 | m_start = add_state ("start"); | |
192 | m_tainted = add_state ("tainted"); | |
193 | m_has_lb = add_state ("has_lb"); | |
194 | m_has_ub = add_state ("has_ub"); | |
195 | m_stop = add_state ("stop"); | |
196 | } | |
197 | ||
198 | /* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */ | |
199 | ||
200 | bool | |
201 | taint_state_machine::on_stmt (sm_context *sm_ctxt, | |
202 | const supernode *node, | |
203 | const gimple *stmt) const | |
204 | { | |
205 | if (const gcall *call = dyn_cast <const gcall *> (stmt)) | |
206 | if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call)) | |
207 | { | |
208 | if (is_named_call_p (callee_fndecl, "fread", call, 4)) | |
209 | { | |
210 | tree arg = gimple_call_arg (call, 0); | |
211 | arg = sm_ctxt->get_readable_tree (arg); | |
212 | ||
213 | sm_ctxt->on_transition (node, stmt, arg, m_start, m_tainted); | |
214 | ||
215 | /* Dereference an ADDR_EXPR. */ | |
216 | // TODO: should the engine do this? | |
217 | if (TREE_CODE (arg) == ADDR_EXPR) | |
218 | sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0), | |
219 | m_start, m_tainted); | |
220 | return true; | |
221 | } | |
222 | } | |
223 | // TODO: ...etc; many other sources of untrusted data | |
224 | ||
225 | if (const gassign *assign = dyn_cast <const gassign *> (stmt)) | |
226 | { | |
227 | tree rhs1 = gimple_assign_rhs1 (assign); | |
228 | enum tree_code op = gimple_assign_rhs_code (assign); | |
229 | ||
230 | /* Check array accesses. */ | |
231 | if (op == ARRAY_REF) | |
232 | { | |
233 | tree arg = TREE_OPERAND (rhs1, 1); | |
234 | arg = sm_ctxt->get_readable_tree (arg); | |
235 | ||
236 | /* Unsigned types have an implicit lower bound. */ | |
237 | bool is_unsigned = false; | |
238 | if (INTEGRAL_TYPE_P (TREE_TYPE (arg))) | |
239 | is_unsigned = TYPE_UNSIGNED (TREE_TYPE (arg)); | |
240 | ||
241 | /* Complain about missing bounds. */ | |
242 | sm_ctxt->warn_for_state | |
243 | (node, stmt, arg, m_tainted, | |
244 | new tainted_array_index (*this, arg, | |
245 | is_unsigned | |
246 | ? BOUNDS_LOWER : BOUNDS_NONE)); | |
247 | sm_ctxt->on_transition (node, stmt, arg, m_tainted, m_stop); | |
248 | ||
249 | /* Complain about missing upper bound. */ | |
250 | sm_ctxt->warn_for_state (node, stmt, arg, m_has_lb, | |
251 | new tainted_array_index (*this, arg, | |
252 | BOUNDS_LOWER)); | |
253 | sm_ctxt->on_transition (node, stmt, arg, m_has_lb, m_stop); | |
254 | ||
255 | /* Complain about missing lower bound. */ | |
256 | if (!is_unsigned) | |
257 | { | |
258 | sm_ctxt->warn_for_state (node, stmt, arg, m_has_ub, | |
259 | new tainted_array_index (*this, arg, | |
260 | BOUNDS_UPPER)); | |
261 | sm_ctxt->on_transition (node, stmt, arg, m_has_ub, m_stop); | |
262 | } | |
263 | } | |
264 | } | |
265 | ||
266 | return false; | |
267 | } | |
268 | ||
269 | /* Implementation of state_machine::on_condition vfunc for taint_state_machine. | |
270 | Potentially transition state 'tainted' to 'has_ub' or 'has_lb', | |
271 | and states 'has_ub' and 'has_lb' to 'stop'. */ | |
272 | ||
273 | void | |
274 | taint_state_machine::on_condition (sm_context *sm_ctxt, | |
275 | const supernode *node, | |
276 | const gimple *stmt, | |
277 | tree lhs, | |
278 | enum tree_code op, | |
279 | tree rhs ATTRIBUTE_UNUSED) const | |
280 | { | |
281 | if (stmt == NULL) | |
282 | return; | |
283 | ||
284 | // TODO: this doesn't use the RHS; should we make it symmetric? | |
285 | ||
286 | // TODO | |
287 | switch (op) | |
288 | { | |
289 | //case NE_EXPR: | |
290 | //case EQ_EXPR: | |
291 | case GE_EXPR: | |
292 | case GT_EXPR: | |
293 | { | |
294 | sm_ctxt->on_transition (node, stmt, lhs, m_tainted, | |
295 | m_has_lb); | |
296 | sm_ctxt->on_transition (node, stmt, lhs, m_has_ub, | |
297 | m_stop); | |
298 | } | |
299 | break; | |
300 | case LE_EXPR: | |
301 | case LT_EXPR: | |
302 | { | |
303 | sm_ctxt->on_transition (node, stmt, lhs, m_tainted, | |
304 | m_has_ub); | |
305 | sm_ctxt->on_transition (node, stmt, lhs, m_has_lb, | |
306 | m_stop); | |
307 | } | |
308 | break; | |
309 | default: | |
310 | break; | |
311 | } | |
312 | } | |
313 | ||
314 | bool | |
315 | taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const | |
316 | { | |
317 | return true; | |
318 | } | |
319 | ||
320 | } // anonymous namespace | |
321 | ||
322 | /* Internal interface to this file. */ | |
323 | ||
324 | state_machine * | |
325 | make_taint_state_machine (logger *logger) | |
326 | { | |
327 | return new taint_state_machine (logger); | |
328 | } | |
329 | ||
75038aa6 DM |
330 | } // namespace ana |
331 | ||
757bf1df | 332 | #endif /* #if ENABLE_ANALYZER */ |