]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/analyzer/store.h
analyzer: const fixes [PR98679]
[thirdparty/gcc.git] / gcc / analyzer / store.h
1 /* Classes for modeling the state of memory.
2 Copyright (C) 2020-2021 Free Software Foundation, Inc.
3 Contributed by David Malcolm <dmalcolm@redhat.com>.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #ifndef GCC_ANALYZER_STORE_H
22 #define GCC_ANALYZER_STORE_H
23
24 /* Implementation of the region-based ternary model described in:
25 "A Memory Model for Static Analysis of C Programs"
26 (Zhongxing Xu, Ted Kremenek, and Jian Zhang)
27 http://lcs.ios.ac.cn/~xuzb/canalyze/memmodel.pdf */
28
29 /* The store models memory as a collection of "clusters", where regions
30 are partitioned into clusters via their base region.
31
32 For example, given:
33 int a, b, c;
34 struct coord { double x; double y; } verts[3];
35 then "verts[0].y" and "verts[1].x" both have "verts" as their base region.
36 Each of a, b, c, and verts will have their own clusters, so that we
37 know that writes to e.g. "verts[1].x".don't affect e.g. "a".
38
39 Within each cluster we store a map of bindings to values, where the
40 binding keys can be either concrete or symbolic.
41
42 Concrete bindings affect a specific range of bits relative to the start
43 of the base region of the cluster, whereas symbolic bindings affect
44 a specific subregion within the cluster.
45
46 Consider (from the symbolic-1.c testcase):
47
48 char arr[1024];
49 arr[2] = a; (1)
50 arr[3] = b; (2)
51 After (1) and (2), the cluster for "arr" has concrete bindings
52 for bits 16-23 and for bits 24-31, with svalues "INIT_VAL(a)"
53 and "INIT_VAL(b)" respectively:
54 cluster: {bits 16-23: "INIT_VAL(a)",
55 bits 24-31: "INIT_VAL(b)";
56 flags: {}}
57 Attempting to query unbound subregions e.g. arr[4] will
58 return "UNINITIALIZED".
59 "a" and "b" are each in their own clusters, with no explicit
60 bindings, and thus implicitly have value INIT_VAL(a) and INIT_VAL(b).
61
62 arr[3] = c; (3)
63 After (3), the concrete binding for bits 24-31 is replaced with the
64 svalue "INIT_VAL(c)":
65 cluster: {bits 16-23: "INIT_VAL(a)", (from before)
66 bits 24-31: "INIT_VAL(c)"; (updated)
67 flags: {}}
68
69 arr[i] = d; (4)
70 After (4), we lose the concrete bindings and replace them with a
71 symbolic binding for "arr[i]", with svalue "INIT_VAL(d)". We also
72 mark the cluster as having been "symbolically touched": future
73 attempts to query the values of subregions other than "arr[i]",
74 such as "arr[3]" are "UNKNOWN", since we don't know if the write
75 to arr[i] affected them.
76 cluster: {symbolic_key(arr[i]): "INIT_VAL(d)";
77 flags: {TOUCHED}}
78
79 arr[j] = e; (5)
80 After (5), we lose the symbolic binding for "arr[i]" since we could
81 have overwritten it, and add a symbolic binding for "arr[j]".
82 cluster: {symbolic_key(arr[j]): "INIT_VAL(d)"; (different symbolic
83 flags: {TOUCHED}} binding)
84
85 arr[3] = f; (6)
86 After (6), we lose the symbolic binding for "arr[j]" since we could
87 have overwritten it, and gain a concrete binding for bits 24-31
88 again, this time with svalue "INIT_VAL(e)":
89 cluster: {bits 24-31: "INIT_VAL(d)";
90 flags: {TOUCHED}}
91 The cluster is still flagged as touched, so that we know that
92 accesses to other elements are "UNKNOWN" rather than
93 "UNINITIALIZED".
94
95 Handling symbolic regions requires us to handle aliasing.
96
97 In the first example above, each of a, b, c and verts are non-symbolic
98 base regions and so their clusters are "concrete clusters", whereas given:
99 struct coord *p, *q;
100 then "*p" and "*q" are symbolic base regions, and thus "*p" and "*q"
101 have "symbolic clusters".
102
103 In the above, "verts[i].x" will have a symbolic *binding* within a
104 concrete cluster for "verts", whereas "*p" is a symbolic *cluster*.
105
106 Writes to concrete clusters can't affect other concrete clusters,
107 but can affect symbolic clusters; e.g. after:
108 verts[0].x = 42;
109 we bind 42 in the cluster for "verts", but the clusters for "b" and "c"
110 can't be affected. Any symbolic clusters for *p and for *q can be
111 affected, *p and *q could alias verts.
112
113 Writes to a symbolic cluster can affect other clusters, both
114 concrete and symbolic; e.g. after:
115 p->x = 17;
116 we bind 17 within the cluster for "*p". The concrete clusters for a, b,
117 c, and verts could be affected, depending on whether *p aliases them.
118 Similarly, the symbolic cluster to *q could be affected. */
119
120 namespace ana {
121
122 class concrete_binding;
123
124 /* An enum for discriminating between "direct" vs "default" levels of
125 mapping. */
126
127 enum binding_kind
128 {
129 /* Special-case value for hash support.
130 This is the initial entry, so that hash traits can have
131 empty_zero_p = true. */
132 BK_empty = 0,
133
134 /* Special-case value for hash support. */
135 BK_deleted,
136
137 /* The normal kind of mapping. */
138 BK_direct,
139
140 /* A lower-priority kind of mapping, for use when inheriting
141 default values from a parent region. */
142 BK_default
143 };
144
145 extern const char *binding_kind_to_string (enum binding_kind kind);
146
147 /* Abstract base class for describing ranges of bits within a binding_map
148 that can have svalues bound to them. */
149
150 class binding_key
151 {
152 public:
153 virtual ~binding_key () {}
154 virtual bool concrete_p () const = 0;
155 bool symbolic_p () const { return !concrete_p (); }
156
157 static const binding_key *make (store_manager *mgr, const region *r,
158 enum binding_kind kind);
159
160 virtual void dump_to_pp (pretty_printer *pp, bool simple) const;
161 void dump (bool simple) const;
162 label_text get_desc (bool simple=true) const;
163
164 static int cmp_ptrs (const void *, const void *);
165 static int cmp (const binding_key *, const binding_key *);
166
167 virtual const concrete_binding *dyn_cast_concrete_binding () const
168 { return NULL; }
169
170 enum binding_kind get_kind () const { return m_kind; }
171
172 void mark_deleted () { m_kind = BK_deleted; }
173 void mark_empty () { m_kind = BK_empty; }
174 bool is_deleted () const { return m_kind == BK_deleted; }
175 bool is_empty () const { return m_kind == BK_empty; }
176
177 protected:
178 binding_key (enum binding_kind kind) : m_kind (kind) {}
179
180 hashval_t impl_hash () const
181 {
182 return m_kind;
183 }
184 bool impl_eq (const binding_key &other) const
185 {
186 return m_kind == other.m_kind;
187 }
188
189 private:
190 enum binding_kind m_kind;
191 };
192
193 /* Concrete subclass of binding_key, for describing a concrete range of
194 bits within the binding_map (e.g. "bits 8-15"). */
195
196 class concrete_binding : public binding_key
197 {
198 public:
199 /* This class is its own key for the purposes of consolidation. */
200 typedef concrete_binding key_t;
201
202 concrete_binding (bit_offset_t start_bit_offset, bit_size_t size_in_bits,
203 enum binding_kind kind)
204 : binding_key (kind),
205 m_start_bit_offset (start_bit_offset),
206 m_size_in_bits (size_in_bits)
207 {}
208 bool concrete_p () const FINAL OVERRIDE { return true; }
209
210 hashval_t hash () const
211 {
212 inchash::hash hstate;
213 hstate.add_wide_int (m_start_bit_offset);
214 hstate.add_wide_int (m_size_in_bits);
215 return hstate.end () ^ binding_key::impl_hash ();
216 }
217 bool operator== (const concrete_binding &other) const
218 {
219 if (!binding_key::impl_eq (other))
220 return false;
221 return (m_start_bit_offset == other.m_start_bit_offset
222 && m_size_in_bits == other.m_size_in_bits);
223 }
224
225 void dump_to_pp (pretty_printer *pp, bool simple) const FINAL OVERRIDE;
226
227 const concrete_binding *dyn_cast_concrete_binding () const FINAL OVERRIDE
228 { return this; }
229
230 bit_offset_t get_start_bit_offset () const { return m_start_bit_offset; }
231 bit_size_t get_size_in_bits () const { return m_size_in_bits; }
232 /* Return the next bit offset after the end of this binding. */
233 bit_offset_t get_next_bit_offset () const
234 {
235 return m_start_bit_offset + m_size_in_bits;
236 }
237
238 bool overlaps_p (const concrete_binding &other) const;
239
240 static int cmp_ptr_ptr (const void *, const void *);
241
242 private:
243 bit_offset_t m_start_bit_offset;
244 bit_size_t m_size_in_bits;
245 };
246
247 } // namespace ana
248
249 template <> struct default_hash_traits<ana::concrete_binding>
250 : public member_function_hash_traits<ana::concrete_binding>
251 {
252 static const bool empty_zero_p = true;
253 };
254
255 namespace ana {
256
257 /* Concrete subclass of binding_key, for describing a symbolic set of
258 bits within the binding_map in terms of a region (e.g. "arr[i]"). */
259
260 class symbolic_binding : public binding_key
261 {
262 public:
263 /* This class is its own key for the purposes of consolidation. */
264 typedef symbolic_binding key_t;
265
266 symbolic_binding (const region *region, enum binding_kind kind)
267 : binding_key (kind),
268 m_region (region)
269 {}
270 bool concrete_p () const FINAL OVERRIDE { return false; }
271
272 hashval_t hash () const
273 {
274 return (binding_key::impl_hash () ^ (intptr_t)m_region);
275 }
276 bool operator== (const symbolic_binding &other) const
277 {
278 if (!binding_key::impl_eq (other))
279 return false;
280 return (m_region == other.m_region);
281 }
282
283 void dump_to_pp (pretty_printer *pp, bool simple) const FINAL OVERRIDE;
284
285 const region *get_region () const { return m_region; }
286
287 static int cmp_ptr_ptr (const void *, const void *);
288
289 private:
290 const region *m_region;
291 };
292
293 } // namespace ana
294
295 template <> struct default_hash_traits<ana::symbolic_binding>
296 : public member_function_hash_traits<ana::symbolic_binding>
297 {
298 static const bool empty_zero_p = true;
299 };
300
301 namespace ana {
302
303 /* A mapping from binding_keys to svalues, for use by binding_cluster
304 and compound_svalue. */
305
306 class binding_map
307 {
308 public:
309 typedef hash_map <const binding_key *, const svalue *> map_t;
310 typedef map_t::iterator iterator_t;
311
312 binding_map () : m_map () {}
313 binding_map (const binding_map &other);
314 binding_map& operator=(const binding_map &other);
315
316 bool operator== (const binding_map &other) const;
317 bool operator!= (const binding_map &other) const
318 {
319 return !(*this == other);
320 }
321
322 hashval_t hash () const;
323
324 const svalue *get (const binding_key *key) const
325 {
326 const svalue **slot = const_cast<map_t &> (m_map).get (key);
327 if (slot)
328 return *slot;
329 else
330 return NULL;
331 }
332 bool put (const binding_key *k, const svalue *v)
333 {
334 gcc_assert (v);
335 return m_map.put (k, v);
336 }
337
338 void remove (const binding_key *k) { m_map.remove (k); }
339 void empty () { m_map.empty (); }
340
341 iterator_t begin () const { return m_map.begin (); }
342 iterator_t end () const { return m_map.end (); }
343 size_t elements () const { return m_map.elements (); }
344
345 void dump_to_pp (pretty_printer *pp, bool simple, bool multiline) const;
346 void dump (bool simple) const;
347
348 json::object *to_json () const;
349
350 bool apply_ctor_to_region (const region *parent_reg, tree ctor,
351 region_model_manager *mgr);
352
353 static int cmp (const binding_map &map1, const binding_map &map2);
354
355 private:
356 bool apply_ctor_val_to_range (const region *parent_reg,
357 region_model_manager *mgr,
358 tree min_index, tree max_index,
359 tree val);
360 bool apply_ctor_pair_to_child_region (const region *parent_reg,
361 region_model_manager *mgr,
362 tree index, tree val);
363
364 map_t m_map;
365 };
366
367 /* Concept: BindingVisitor, for use by binding_cluster::for_each_binding
368 and store::for_each_binding.
369
370 Should implement:
371 void on_binding (const binding_key *key, const svalue *&sval);
372 */
373
374 /* All of the bindings within a store for regions that share the same
375 base region. */
376
377 class binding_cluster
378 {
379 public:
380 friend class store;
381
382 typedef hash_map <const binding_key *, const svalue *> map_t;
383 typedef map_t::iterator iterator_t;
384
385 binding_cluster (const region *base_region)
386 : m_base_region (base_region), m_map (),
387 m_escaped (false), m_touched (false) {}
388 binding_cluster (const binding_cluster &other);
389 binding_cluster& operator=(const binding_cluster &other);
390
391 bool operator== (const binding_cluster &other) const;
392 bool operator!= (const binding_cluster &other) const
393 {
394 return !(*this == other);
395 }
396
397 hashval_t hash () const;
398
399 bool symbolic_p () const;
400
401 void dump_to_pp (pretty_printer *pp, bool simple, bool multiline) const;
402 void dump (bool simple) const;
403
404 json::object *to_json () const;
405
406 void bind (store_manager *mgr, const region *, const svalue *,
407 binding_kind kind);
408
409 void clobber_region (store_manager *mgr, const region *reg);
410 void purge_region (store_manager *mgr, const region *reg);
411 void zero_fill_region (store_manager *mgr, const region *reg);
412 void mark_region_as_unknown (store_manager *mgr, const region *reg);
413
414 const svalue *get_binding (store_manager *mgr, const region *reg,
415 binding_kind kind) const;
416 const svalue *get_binding_recursive (store_manager *mgr,
417 const region *reg,
418 enum binding_kind kind) const;
419 const svalue *get_any_binding (store_manager *mgr,
420 const region *reg) const;
421 const svalue *maybe_get_compound_binding (store_manager *mgr,
422 const region *reg) const;
423
424 void remove_overlapping_bindings (store_manager *mgr, const region *reg);
425
426 template <typename T>
427 void for_each_value (void (*cb) (const svalue *sval, T user_data),
428 T user_data) const
429 {
430 for (map_t::iterator iter = m_map.begin (); iter != m_map.end (); ++iter)
431 cb ((*iter).second, user_data);
432 }
433
434 static bool can_merge_p (const binding_cluster *cluster_a,
435 const binding_cluster *cluster_b,
436 binding_cluster *out_cluster,
437 store *out_store,
438 store_manager *mgr,
439 model_merger *merger);
440 void make_unknown_relative_to (const binding_cluster *other_cluster,
441 store *out_store,
442 store_manager *mgr);
443
444 void mark_as_escaped ();
445 void on_unknown_fncall (const gcall *call, store_manager *mgr);
446
447 bool escaped_p () const { return m_escaped; }
448 bool touched_p () const { return m_touched; }
449
450 bool redundant_p () const;
451 bool empty_p () const { return m_map.elements () == 0; }
452
453 void get_representative_path_vars (const region_model *model,
454 svalue_set *visited,
455 const region *base_reg,
456 const svalue *sval,
457 auto_vec<path_var> *out_pvs) const;
458
459 const svalue *maybe_get_simple_value (store_manager *mgr) const;
460
461 template <typename BindingVisitor>
462 void for_each_binding (BindingVisitor &v) const
463 {
464 for (map_t::iterator iter = m_map.begin (); iter != m_map.end (); ++iter)
465 {
466 const binding_key *key = (*iter).first;
467 const svalue *&sval = (*iter).second;
468 v.on_binding (key, sval);
469 }
470 }
471
472 iterator_t begin () const { return m_map.begin (); }
473 iterator_t end () const { return m_map.end (); }
474
475 const binding_map &get_map () const { return m_map; }
476
477 private:
478 const svalue *get_any_value (const binding_key *key) const;
479 void get_overlapping_bindings (store_manager *mgr, const region *reg,
480 auto_vec<const binding_key *> *out);
481 void bind_compound_sval (store_manager *mgr,
482 const region *reg,
483 const compound_svalue *compound_sval);
484 void bind_key (const binding_key *key, const svalue *sval);
485
486 const region *m_base_region;
487
488 binding_map m_map;
489
490 /* Has a pointer to this cluster "escaped" into a part of the program
491 we don't know about (via a call to a function with an unknown body,
492 or by being passed in as a pointer param of a "top-level" function call).
493 Such regions could be overwritten when other such functions are called,
494 even if the region is no longer reachable by pointers that we are
495 tracking. */
496 bool m_escaped;
497
498 /* Has this cluster been written to via a symbolic binding?
499 If so, then we don't know anything about unbound subregions,
500 so we can't use initial_svalue, treat them as uninitialized, or
501 inherit values from a parent region. */
502 bool m_touched;
503 };
504
505 /* The mapping from regions to svalues.
506 This is actually expressed by subdividing into clusters, to better
507 handle aliasing. */
508
509 class store
510 {
511 public:
512 typedef hash_map <const region *, binding_cluster *> cluster_map_t;
513
514 store ();
515 store (const store &other);
516 ~store ();
517
518 store &operator= (const store &other);
519
520 bool operator== (const store &other) const;
521 bool operator!= (const store &other) const
522 {
523 return !(*this == other);
524 }
525
526 hashval_t hash () const;
527
528 void dump_to_pp (pretty_printer *pp, bool summarize, bool multiline,
529 store_manager *mgr) const;
530 void dump (bool simple) const;
531 void summarize_to_pp (pretty_printer *pp, bool simple) const;
532
533 json::object *to_json () const;
534
535 const svalue *get_direct_binding (store_manager *mgr, const region *reg);
536 const svalue *get_default_binding (store_manager *mgr, const region *reg);
537 const svalue *get_any_binding (store_manager *mgr, const region *reg) const;
538
539 bool called_unknown_fn_p () const { return m_called_unknown_fn; }
540
541 void set_value (store_manager *mgr, const region *lhs_reg,
542 const svalue *rhs_sval, enum binding_kind kind);
543 void clobber_region (store_manager *mgr, const region *reg);
544 void purge_region (store_manager *mgr, const region *reg);
545 void zero_fill_region (store_manager *mgr, const region *reg);
546 void mark_region_as_unknown (store_manager *mgr, const region *reg);
547
548 const binding_cluster *get_cluster (const region *base_reg) const;
549 binding_cluster *get_cluster (const region *base_reg);
550 binding_cluster *get_or_create_cluster (const region *base_reg);
551 void purge_cluster (const region *base_reg);
552
553 template <typename T>
554 void for_each_cluster (void (*cb) (const region *base_reg, T user_data),
555 T user_data) const
556 {
557 for (cluster_map_t::iterator iter = m_cluster_map.begin ();
558 iter != m_cluster_map.end (); ++iter)
559 cb ((*iter).first, user_data);
560 }
561
562 static bool can_merge_p (const store *store_a, const store *store_b,
563 store *out_store, store_manager *mgr,
564 model_merger *merger);
565
566 void mark_as_escaped (const region *base_reg);
567 void on_unknown_fncall (const gcall *call, store_manager *mgr);
568 bool escaped_p (const region *reg) const;
569
570 void get_representative_path_vars (const region_model *model,
571 svalue_set *visited,
572 const svalue *sval,
573 auto_vec<path_var> *out_pvs) const;
574
575 cluster_map_t::iterator begin () const { return m_cluster_map.begin (); }
576 cluster_map_t::iterator end () const { return m_cluster_map.end (); }
577
578 tristate eval_alias (const region *base_reg_a,
579 const region *base_reg_b) const;
580
581 template <typename BindingVisitor>
582 void for_each_binding (BindingVisitor &v)
583 {
584 for (cluster_map_t::iterator iter = m_cluster_map.begin ();
585 iter != m_cluster_map.end (); ++iter)
586 (*iter).second->for_each_binding (v);
587 }
588
589 void canonicalize (store_manager *mgr);
590 void loop_replay_fixup (const store *other_store,
591 region_model_manager *mgr);
592
593 private:
594 void remove_overlapping_bindings (store_manager *mgr, const region *reg);
595 tristate eval_alias_1 (const region *base_reg_a,
596 const region *base_reg_b) const;
597
598 cluster_map_t m_cluster_map;
599
600 /* If this is true, then unknown code has been called, and so
601 any global variable that isn't currently modelled by the store
602 has unknown state, rather than being in an "initial state".
603 This is to avoid having to mark (and thus explicitly track)
604 every global when an unknown function is called; instead, they
605 can be tracked implicitly. */
606 bool m_called_unknown_fn;
607 };
608
609 /* A class responsible for owning and consolidating binding keys
610 (both concrete and symbolic).
611 Key instances are immutable as far as clients are concerned, so they
612 are provided as "const" ptrs. */
613
614 class store_manager
615 {
616 public:
617 store_manager (region_model_manager *mgr) : m_mgr (mgr) {}
618
619 /* binding consolidation. */
620 const concrete_binding *
621 get_concrete_binding (bit_offset_t start_bit_offset,
622 bit_offset_t size_in_bits,
623 enum binding_kind kind);
624 const symbolic_binding *
625 get_symbolic_binding (const region *region,
626 enum binding_kind kind);
627
628 region_model_manager *get_svalue_manager () const
629 {
630 return m_mgr;
631 }
632
633 void log_stats (logger *logger, bool show_objs) const;
634
635 private:
636 region_model_manager *m_mgr;
637 consolidation_map<concrete_binding> m_concrete_binding_key_mgr;
638 consolidation_map<symbolic_binding> m_symbolic_binding_key_mgr;
639 };
640
641 } // namespace ana
642
643 #endif /* GCC_ANALYZER_STORE_H */