]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/analyzer/store.h
466c1830c0f361a89635801494f031841b3476b6
[thirdparty/gcc.git] / gcc / analyzer / store.h
1 /* Classes for modeling the state of memory.
2 Copyright (C) 2020 Free Software Foundation, Inc.
3 Contributed by David Malcolm <dmalcolm@redhat.com>.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #ifndef GCC_ANALYZER_STORE_H
22 #define GCC_ANALYZER_STORE_H
23
24 /* Implementation of the region-based ternary model described in:
25 "A Memory Model for Static Analysis of C Programs"
26 (Zhongxing Xu, Ted Kremenek, and Jian Zhang)
27 http://lcs.ios.ac.cn/~xuzb/canalyze/memmodel.pdf */
28
29 /* The store models memory as a collection of "clusters", where regions
30 are partitioned into clusters via their base region.
31
32 For example, given:
33 int a, b, c;
34 struct coord { double x; double y; } verts[3];
35 then "verts[0].y" and "verts[1].x" both have "verts" as their base region.
36 Each of a, b, c, and verts will have their own clusters, so that we
37 know that writes to e.g. "verts[1].x".don't affect e.g. "a".
38
39 Within each cluster we store a map of bindings to values, where the
40 binding keys can be either concrete or symbolic.
41
42 Concrete bindings affect a specific range of bits relative to the start
43 of the base region of the cluster, whereas symbolic bindings affect
44 a specific subregion within the cluster.
45
46 Consider (from the symbolic-1.c testcase):
47
48 char arr[1024];
49 arr[2] = a; (1)
50 arr[3] = b; (2)
51 After (1) and (2), the cluster for "arr" has concrete bindings
52 for bits 16-23 and for bits 24-31, with svalues "INIT_VAL(a)"
53 and "INIT_VAL(b)" respectively:
54 cluster: {bits 16-23: "INIT_VAL(a)",
55 bits 24-31: "INIT_VAL(b)";
56 flags: {}}
57 Attempting to query unbound subregions e.g. arr[4] will
58 return "UNINITIALIZED".
59 "a" and "b" are each in their own clusters, with no explicit
60 bindings, and thus implicitly have value INIT_VAL(a) and INIT_VAL(b).
61
62 arr[3] = c; (3)
63 After (3), the concrete binding for bits 24-31 is replaced with the
64 svalue "INIT_VAL(c)":
65 cluster: {bits 16-23: "INIT_VAL(a)", (from before)
66 bits 24-31: "INIT_VAL(c)"; (updated)
67 flags: {}}
68
69 arr[i] = d; (4)
70 After (4), we lose the concrete bindings and replace them with a
71 symbolic binding for "arr[i]", with svalue "INIT_VAL(d)". We also
72 mark the cluster as having been "symbolically touched": future
73 attempts to query the values of subregions other than "arr[i]",
74 such as "arr[3]" are "UNKNOWN", since we don't know if the write
75 to arr[i] affected them.
76 cluster: {symbolic_key(arr[i]): "INIT_VAL(d)";
77 flags: {TOUCHED}}
78
79 arr[j] = e; (5)
80 After (5), we lose the symbolic binding for "arr[i]" since we could
81 have overwritten it, and add a symbolic binding for "arr[j]".
82 cluster: {symbolic_key(arr[j]): "INIT_VAL(d)"; (different symbolic
83 flags: {TOUCHED}} binding)
84
85 arr[3] = f; (6)
86 After (6), we lose the symbolic binding for "arr[j]" since we could
87 have overwritten it, and gain a concrete binding for bits 24-31
88 again, this time with svalue "INIT_VAL(e)":
89 cluster: {bits 24-31: "INIT_VAL(d)";
90 flags: {TOUCHED}}
91 The cluster is still flagged as touched, so that we know that
92 accesses to other elements are "UNKNOWN" rather than
93 "UNINITIALIZED".
94
95 Handling symbolic regions requires us to handle aliasing.
96
97 In the first example above, each of a, b, c and verts are non-symbolic
98 base regions and so their clusters are "concrete clusters", whereas given:
99 struct coord *p, *q;
100 then "*p" and "*q" are symbolic base regions, and thus "*p" and "*q"
101 have "symbolic clusters".
102
103 In the above, "verts[i].x" will have a symbolic *binding* within a
104 concrete cluster for "verts", whereas "*p" is a symbolic *cluster*.
105
106 Writes to concrete clusters can't affect other concrete clusters,
107 but can affect symbolic clusters; e.g. after:
108 verts[0].x = 42;
109 we bind 42 in the cluster for "verts", but the clusters for "b" and "c"
110 can't be affected. Any symbolic clusters for *p and for *q can be
111 affected, *p and *q could alias verts.
112
113 Writes to a symbolic cluster can affect other clusters, both
114 concrete and symbolic; e.g. after:
115 p->x = 17;
116 we bind 17 within the cluster for "*p". The concrete clusters for a, b,
117 c, and verts could be affected, depending on whether *p aliases them.
118 Similarly, the symbolic cluster to *q could be affected. */
119
120 namespace ana {
121
122 class concrete_binding;
123
124 /* An enum for discriminating between "direct" vs "default" levels of
125 mapping. */
126
127 enum binding_kind
128 {
129 /* Special-case value for hash support.
130 This is the initial entry, so that hash traits can have
131 empty_zero_p = true. */
132 BK_empty = 0,
133
134 /* Special-case value for hash support. */
135 BK_deleted,
136
137 /* The normal kind of mapping. */
138 BK_direct,
139
140 /* A lower-priority kind of mapping, for use when inheriting
141 default values from a parent region. */
142 BK_default
143 };
144
145 extern const char *binding_kind_to_string (enum binding_kind kind);
146
147 /* Abstract base class for describing ranges of bits within a binding_map
148 that can have svalues bound to them. */
149
150 class binding_key
151 {
152 public:
153 virtual ~binding_key () {}
154 virtual bool concrete_p () const = 0;
155 bool symbolic_p () const { return !concrete_p (); }
156
157 static const binding_key *make (store_manager *mgr, const region *r,
158 enum binding_kind kind);
159
160 virtual void dump_to_pp (pretty_printer *pp, bool simple) const;
161 void dump (bool simple) const;
162 label_text get_desc (bool simple=true) const;
163
164 static int cmp_ptrs (const void *, const void *);
165 static int cmp (const binding_key *, const binding_key *);
166
167 virtual const concrete_binding *dyn_cast_concrete_binding () const
168 { return NULL; }
169
170 enum binding_kind get_kind () const { return m_kind; }
171
172 void mark_deleted () { m_kind = BK_deleted; }
173 void mark_empty () { m_kind = BK_empty; }
174 bool is_deleted () const { return m_kind == BK_deleted; }
175 bool is_empty () const { return m_kind == BK_empty; }
176
177 protected:
178 binding_key (enum binding_kind kind) : m_kind (kind) {}
179
180 hashval_t impl_hash () const
181 {
182 return m_kind;
183 }
184 bool impl_eq (const binding_key &other) const
185 {
186 return m_kind == other.m_kind;
187 }
188
189 private:
190 enum binding_kind m_kind;
191 };
192
193 /* Concrete subclass of binding_key, for describing a concrete range of
194 bits within the binding_map (e.g. "bits 8-15"). */
195
196 class concrete_binding : public binding_key
197 {
198 public:
199 /* This class is its own key for the purposes of consolidation. */
200 typedef concrete_binding key_t;
201
202 concrete_binding (bit_offset_t start_bit_offset, bit_size_t size_in_bits,
203 enum binding_kind kind)
204 : binding_key (kind),
205 m_start_bit_offset (start_bit_offset),
206 m_size_in_bits (size_in_bits)
207 {}
208 bool concrete_p () const FINAL OVERRIDE { return true; }
209
210 hashval_t hash () const
211 {
212 inchash::hash hstate;
213 hstate.add_wide_int (m_start_bit_offset);
214 hstate.add_wide_int (m_size_in_bits);
215 return hstate.end () ^ binding_key::impl_hash ();
216 }
217 bool operator== (const concrete_binding &other) const
218 {
219 if (!binding_key::impl_eq (other))
220 return false;
221 return (m_start_bit_offset == other.m_start_bit_offset
222 && m_size_in_bits == other.m_size_in_bits);
223 }
224
225 void dump_to_pp (pretty_printer *pp, bool simple) const FINAL OVERRIDE;
226
227 const concrete_binding *dyn_cast_concrete_binding () const FINAL OVERRIDE
228 { return this; }
229
230 bit_offset_t get_start_bit_offset () const { return m_start_bit_offset; }
231 bit_size_t get_size_in_bits () const { return m_size_in_bits; }
232 /* Return the next bit offset after the end of this binding. */
233 bit_offset_t get_next_bit_offset () const
234 {
235 return m_start_bit_offset + m_size_in_bits;
236 }
237
238 bool overlaps_p (const concrete_binding &other) const;
239
240 static int cmp_ptr_ptr (const void *, const void *);
241
242 private:
243 bit_offset_t m_start_bit_offset;
244 bit_size_t m_size_in_bits;
245 };
246
247 } // namespace ana
248
249 template <> struct default_hash_traits<ana::concrete_binding>
250 : public member_function_hash_traits<ana::concrete_binding>
251 {
252 static const bool empty_zero_p = true;
253 };
254
255 namespace ana {
256
257 /* Concrete subclass of binding_key, for describing a symbolic set of
258 bits within the binding_map in terms of a region (e.g. "arr[i]"). */
259
260 class symbolic_binding : public binding_key
261 {
262 public:
263 /* This class is its own key for the purposes of consolidation. */
264 typedef symbolic_binding key_t;
265
266 symbolic_binding (const region *region, enum binding_kind kind)
267 : binding_key (kind),
268 m_region (region)
269 {}
270 bool concrete_p () const FINAL OVERRIDE { return false; }
271
272 hashval_t hash () const
273 {
274 return (binding_key::impl_hash () ^ (long)m_region);
275 }
276 bool operator== (const symbolic_binding &other) const
277 {
278 if (!binding_key::impl_eq (other))
279 return false;
280 return (m_region == other.m_region);
281 }
282
283 void dump_to_pp (pretty_printer *pp, bool simple) const FINAL OVERRIDE;
284
285 const region *get_region () const { return m_region; }
286
287 static int cmp_ptr_ptr (const void *, const void *);
288
289 private:
290 const region *m_region;
291 };
292
293 } // namespace ana
294
295 template <> struct default_hash_traits<ana::symbolic_binding>
296 : public member_function_hash_traits<ana::symbolic_binding>
297 {
298 static const bool empty_zero_p = true;
299 };
300
301 namespace ana {
302
303 /* A mapping from binding_keys to svalues, for use by binding_cluster
304 and compound_svalue. */
305
306 class binding_map
307 {
308 public:
309 typedef hash_map <const binding_key *, const svalue *> map_t;
310 typedef map_t::iterator iterator_t;
311
312 binding_map () : m_map () {}
313 binding_map (const binding_map &other);
314 binding_map& operator=(const binding_map &other);
315
316 bool operator== (const binding_map &other) const;
317 bool operator!= (const binding_map &other) const
318 {
319 return !(*this == other);
320 }
321
322 hashval_t hash () const;
323
324 const svalue *get (const binding_key *key) const
325 {
326 const svalue **slot = const_cast<map_t &> (m_map).get (key);
327 if (slot)
328 return *slot;
329 else
330 return NULL;
331 }
332 bool put (const binding_key *k, const svalue *v)
333 {
334 gcc_assert (v);
335 return m_map.put (k, v);
336 }
337
338 void remove (const binding_key *k) { m_map.remove (k); }
339 void empty () { m_map.empty (); }
340
341 iterator_t begin () const { return m_map.begin (); }
342 iterator_t end () const { return m_map.end (); }
343 size_t elements () const { return m_map.elements (); }
344
345 void dump_to_pp (pretty_printer *pp, bool simple, bool multiline) const;
346 void dump (bool simple) const;
347
348 json::object *to_json () const;
349
350 bool apply_ctor_to_region (const region *parent_reg, tree ctor,
351 region_model_manager *mgr);
352
353 static int cmp (const binding_map &map1, const binding_map &map2);
354
355 private:
356 bool apply_ctor_val_to_range (const region *parent_reg,
357 region_model_manager *mgr,
358 tree min_index, tree max_index,
359 tree val);
360 bool apply_ctor_pair_to_child_region (const region *parent_reg,
361 region_model_manager *mgr,
362 tree index, tree val);
363
364 map_t m_map;
365 };
366
367 /* Concept: BindingVisitor, for use by binding_cluster::for_each_binding
368 and store::for_each_binding.
369
370 Should implement:
371 void on_binding (const binding_key *key, const svalue *&sval);
372 */
373
374 /* All of the bindings within a store for regions that share the same
375 base region. */
376
377 class binding_cluster
378 {
379 public:
380 friend class store;
381
382 typedef hash_map <const binding_key *, const svalue *> map_t;
383 typedef map_t::iterator iterator_t;
384
385 binding_cluster (const region *base_region)
386 : m_base_region (base_region), m_map (),
387 m_escaped (false), m_touched (false) {}
388 binding_cluster (const binding_cluster &other);
389 binding_cluster& operator=(const binding_cluster &other);
390
391 bool operator== (const binding_cluster &other) const;
392 bool operator!= (const binding_cluster &other) const
393 {
394 return !(*this == other);
395 }
396
397 hashval_t hash () const;
398
399 bool symbolic_p () const;
400
401 void dump_to_pp (pretty_printer *pp, bool simple, bool multiline) const;
402 void dump (bool simple) const;
403
404 json::object *to_json () const;
405
406 void bind (store_manager *mgr, const region *, const svalue *,
407 binding_kind kind);
408
409 void clobber_region (store_manager *mgr, const region *reg);
410 void purge_region (store_manager *mgr, const region *reg);
411 void zero_fill_region (store_manager *mgr, const region *reg);
412 void mark_region_as_unknown (store_manager *mgr, const region *reg);
413
414 const svalue *get_binding (store_manager *mgr, const region *reg,
415 binding_kind kind) const;
416 const svalue *get_binding_recursive (store_manager *mgr,
417 const region *reg,
418 enum binding_kind kind) const;
419 const svalue *get_any_binding (store_manager *mgr,
420 const region *reg) const;
421 const svalue *maybe_get_compound_binding (store_manager *mgr,
422 const region *reg) const;
423
424 void remove_overlapping_bindings (store_manager *mgr, const region *reg);
425
426 template <typename T>
427 void for_each_value (void (*cb) (const svalue *sval, T user_data),
428 T user_data)
429 {
430 for (map_t::iterator iter = m_map.begin (); iter != m_map.end (); ++iter)
431 cb ((*iter).second, user_data);
432 }
433
434 static bool can_merge_p (const binding_cluster *cluster_a,
435 const binding_cluster *cluster_b,
436 binding_cluster *out_cluster,
437 store_manager *mgr,
438 model_merger *merger);
439 void make_unknown_relative_to (const binding_cluster *other_cluster,
440 store_manager *mgr);
441
442 void mark_as_escaped ();
443 void on_unknown_fncall (const gcall *call, store_manager *mgr);
444
445 bool escaped_p () const { return m_escaped; }
446 bool touched_p () const { return m_touched; }
447
448 bool redundant_p () const;
449 bool empty_p () const { return m_map.elements () == 0; }
450
451 void get_representative_path_vars (const region_model *model,
452 svalue_set *visited,
453 const region *base_reg,
454 const svalue *sval,
455 auto_vec<path_var> *out_pvs) const;
456
457 const svalue *maybe_get_simple_value (store_manager *mgr) const;
458
459 template <typename BindingVisitor>
460 void for_each_binding (BindingVisitor &v)
461 {
462 for (map_t::iterator iter = m_map.begin (); iter != m_map.end (); ++iter)
463 {
464 const binding_key *key = (*iter).first;
465 const svalue *&sval = (*iter).second;
466 v.on_binding (key, sval);
467 }
468 }
469
470 iterator_t begin () const { return m_map.begin (); }
471 iterator_t end () const { return m_map.end (); }
472
473 const binding_map &get_map () const { return m_map; }
474
475 private:
476 const svalue *get_any_value (const binding_key *key) const;
477 void get_overlapping_bindings (store_manager *mgr, const region *reg,
478 auto_vec<const binding_key *> *out);
479 void bind_compound_sval (store_manager *mgr,
480 const region *reg,
481 const compound_svalue *compound_sval);
482 void bind_key (const binding_key *key, const svalue *sval);
483
484 const region *m_base_region;
485
486 binding_map m_map;
487
488 /* Has a pointer to this cluster "escaped" into a part of the program
489 we don't know about (via a call to a function with an unknown body,
490 or by being passed in as a pointer param of a "top-level" function call).
491 Such regions could be overwritten when other such functions are called,
492 even if the region is no longer reachable by pointers that we are
493 tracking. */
494 bool m_escaped;
495
496 /* Has this cluster been written to via a symbolic binding?
497 If so, then we don't know anything about unbound subregions,
498 so we can't use initial_svalue, treat them as uninitialized, or
499 inherit values from a parent region. */
500 bool m_touched;
501 };
502
503 /* The mapping from regions to svalues.
504 This is actually expressed by subdividing into clusters, to better
505 handle aliasing. */
506
507 class store
508 {
509 public:
510 typedef hash_map <const region *, binding_cluster *> cluster_map_t;
511
512 store ();
513 store (const store &other);
514 ~store ();
515
516 store &operator= (const store &other);
517
518 bool operator== (const store &other) const;
519 bool operator!= (const store &other) const
520 {
521 return !(*this == other);
522 }
523
524 hashval_t hash () const;
525
526 void dump_to_pp (pretty_printer *pp, bool summarize, bool multiline,
527 store_manager *mgr) const;
528 void dump (bool simple) const;
529 void summarize_to_pp (pretty_printer *pp, bool simple) const;
530
531 json::object *to_json () const;
532
533 const svalue *get_direct_binding (store_manager *mgr, const region *reg);
534 const svalue *get_default_binding (store_manager *mgr, const region *reg);
535 const svalue *get_any_binding (store_manager *mgr, const region *reg) const;
536
537 bool called_unknown_fn_p () const { return m_called_unknown_fn; }
538
539 void set_value (store_manager *mgr, const region *lhs_reg,
540 const svalue *rhs_sval, enum binding_kind kind);
541 void clobber_region (store_manager *mgr, const region *reg);
542 void purge_region (store_manager *mgr, const region *reg);
543 void zero_fill_region (store_manager *mgr, const region *reg);
544 void mark_region_as_unknown (store_manager *mgr, const region *reg);
545
546 const binding_cluster *get_cluster (const region *base_reg) const;
547 binding_cluster *get_cluster (const region *base_reg);
548 binding_cluster *get_or_create_cluster (const region *base_reg);
549 void purge_cluster (const region *base_reg);
550
551 template <typename T>
552 void for_each_cluster (void (*cb) (const region *base_reg, T user_data),
553 T user_data) const
554 {
555 for (cluster_map_t::iterator iter = m_cluster_map.begin ();
556 iter != m_cluster_map.end (); ++iter)
557 cb ((*iter).first, user_data);
558 }
559
560 static bool can_merge_p (const store *store_a, const store *store_b,
561 store *out_store, store_manager *mgr,
562 model_merger *merger);
563
564 void mark_as_escaped (const region *base_reg);
565 void on_unknown_fncall (const gcall *call, store_manager *mgr);
566 bool escaped_p (const region *reg) const;
567
568 void get_representative_path_vars (const region_model *model,
569 svalue_set *visited,
570 const svalue *sval,
571 auto_vec<path_var> *out_pvs) const;
572
573 cluster_map_t::iterator begin () const { return m_cluster_map.begin (); }
574 cluster_map_t::iterator end () const { return m_cluster_map.end (); }
575
576 tristate eval_alias (const region *base_reg_a,
577 const region *base_reg_b) const;
578
579 template <typename BindingVisitor>
580 void for_each_binding (BindingVisitor &v)
581 {
582 for (cluster_map_t::iterator iter = m_cluster_map.begin ();
583 iter != m_cluster_map.end (); ++iter)
584 (*iter).second->for_each_binding (v);
585 }
586
587 void canonicalize (store_manager *mgr);
588 void loop_replay_fixup (const store *other_store,
589 region_model_manager *mgr);
590
591 private:
592 void remove_overlapping_bindings (store_manager *mgr, const region *reg);
593 tristate eval_alias_1 (const region *base_reg_a,
594 const region *base_reg_b) const;
595
596 cluster_map_t m_cluster_map;
597
598 /* If this is true, then unknown code has been called, and so
599 any global variable that isn't currently modelled by the store
600 has unknown state, rather than being in an "initial state".
601 This is to avoid having to mark (and thus explicitly track)
602 every global when an unknown function is called; instead, they
603 can be tracked implicitly. */
604 bool m_called_unknown_fn;
605 };
606
607 /* A class responsible for owning and consolidating binding keys
608 (both concrete and symbolic).
609 Key instances are immutable as far as clients are concerned, so they
610 are provided as "const" ptrs. */
611
612 class store_manager
613 {
614 public:
615 store_manager (region_model_manager *mgr) : m_mgr (mgr) {}
616
617 /* binding consolidation. */
618 const concrete_binding *
619 get_concrete_binding (bit_offset_t start_bit_offset,
620 bit_offset_t size_in_bits,
621 enum binding_kind kind);
622 const symbolic_binding *
623 get_symbolic_binding (const region *region,
624 enum binding_kind kind);
625
626 region_model_manager *get_svalue_manager () const
627 {
628 return m_mgr;
629 }
630
631 void log_stats (logger *logger, bool show_objs) const;
632
633 private:
634 region_model_manager *m_mgr;
635 consolidation_map<concrete_binding> m_concrete_binding_key_mgr;
636 consolidation_map<symbolic_binding> m_symbolic_binding_key_mgr;
637 };
638
639 } // namespace ana
640
641 #endif /* GCC_ANALYZER_STORE_H */