1 /* Classes for modeling the state of memory.
2 Copyright (C) 2020 Free Software Foundation, Inc.
3 Contributed by David Malcolm <dmalcolm@redhat.com>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #ifndef GCC_ANALYZER_STORE_H
22 #define GCC_ANALYZER_STORE_H
24 /* Implementation of the region-based ternary model described in:
25 "A Memory Model for Static Analysis of C Programs"
26 (Zhongxing Xu, Ted Kremenek, and Jian Zhang)
27 http://lcs.ios.ac.cn/~xuzb/canalyze/memmodel.pdf */
29 /* The store models memory as a collection of "clusters", where regions
30 are partitioned into clusters via their base region.
34 struct coord { double x; double y; } verts[3];
35 then "verts[0].y" and "verts[1].x" both have "verts" as their base region.
36 Each of a, b, c, and verts will have their own clusters, so that we
37 know that writes to e.g. "verts[1].x".don't affect e.g. "a".
39 Within each cluster we store a map of bindings to values, where the
40 binding keys can be either concrete or symbolic.
42 Concrete bindings affect a specific range of bits relative to the start
43 of the base region of the cluster, whereas symbolic bindings affect
44 a specific subregion within the cluster.
46 Consider (from the symbolic-1.c testcase):
51 After (1) and (2), the cluster for "arr" has concrete bindings
52 for bits 16-23 and for bits 24-31, with svalues "INIT_VAL(a)"
53 and "INIT_VAL(b)" respectively:
54 cluster: {bits 16-23: "INIT_VAL(a)",
55 bits 24-31: "INIT_VAL(b)";
57 Attempting to query unbound subregions e.g. arr[4] will
58 return "UNINITIALIZED".
59 "a" and "b" are each in their own clusters, with no explicit
60 bindings, and thus implicitly have value INIT_VAL(a) and INIT_VAL(b).
63 After (3), the concrete binding for bits 24-31 is replaced with the
65 cluster: {bits 16-23: "INIT_VAL(a)", (from before)
66 bits 24-31: "INIT_VAL(c)"; (updated)
70 After (4), we lose the concrete bindings and replace them with a
71 symbolic binding for "arr[i]", with svalue "INIT_VAL(d)". We also
72 mark the cluster as having been "symbolically touched": future
73 attempts to query the values of subregions other than "arr[i]",
74 such as "arr[3]" are "UNKNOWN", since we don't know if the write
75 to arr[i] affected them.
76 cluster: {symbolic_key(arr[i]): "INIT_VAL(d)";
80 After (5), we lose the symbolic binding for "arr[i]" since we could
81 have overwritten it, and add a symbolic binding for "arr[j]".
82 cluster: {symbolic_key(arr[j]): "INIT_VAL(d)"; (different symbolic
83 flags: {TOUCHED}} binding)
86 After (6), we lose the symbolic binding for "arr[j]" since we could
87 have overwritten it, and gain a concrete binding for bits 24-31
88 again, this time with svalue "INIT_VAL(e)":
89 cluster: {bits 24-31: "INIT_VAL(d)";
91 The cluster is still flagged as touched, so that we know that
92 accesses to other elements are "UNKNOWN" rather than
95 Handling symbolic regions requires us to handle aliasing.
97 In the first example above, each of a, b, c and verts are non-symbolic
98 base regions and so their clusters are "concrete clusters", whereas given:
100 then "*p" and "*q" are symbolic base regions, and thus "*p" and "*q"
101 have "symbolic clusters".
103 In the above, "verts[i].x" will have a symbolic *binding* within a
104 concrete cluster for "verts", whereas "*p" is a symbolic *cluster*.
106 Writes to concrete clusters can't affect other concrete clusters,
107 but can affect symbolic clusters; e.g. after:
109 we bind 42 in the cluster for "verts", but the clusters for "b" and "c"
110 can't be affected. Any symbolic clusters for *p and for *q can be
111 affected, *p and *q could alias verts.
113 Writes to a symbolic cluster can affect other clusters, both
114 concrete and symbolic; e.g. after:
116 we bind 17 within the cluster for "*p". The concrete clusters for a, b,
117 c, and verts could be affected, depending on whether *p aliases them.
118 Similarly, the symbolic cluster to *q could be affected. */
122 class concrete_binding
;
124 /* An enum for discriminating between "direct" vs "default" levels of
129 /* Special-case value for hash support.
130 This is the initial entry, so that hash traits can have
131 empty_zero_p = true. */
134 /* Special-case value for hash support. */
137 /* The normal kind of mapping. */
140 /* A lower-priority kind of mapping, for use when inheriting
141 default values from a parent region. */
145 extern const char *binding_kind_to_string (enum binding_kind kind
);
147 /* Abstract base class for describing ranges of bits within a binding_map
148 that can have svalues bound to them. */
153 virtual ~binding_key () {}
154 virtual bool concrete_p () const = 0;
155 bool symbolic_p () const { return !concrete_p (); }
157 static const binding_key
*make (store_manager
*mgr
, const region
*r
,
158 enum binding_kind kind
);
160 virtual void dump_to_pp (pretty_printer
*pp
, bool simple
) const;
161 void dump (bool simple
) const;
162 label_text
get_desc (bool simple
=true) const;
164 static int cmp_ptrs (const void *, const void *);
165 static int cmp (const binding_key
*, const binding_key
*);
167 virtual const concrete_binding
*dyn_cast_concrete_binding () const
170 enum binding_kind
get_kind () const { return m_kind
; }
172 void mark_deleted () { m_kind
= BK_deleted
; }
173 void mark_empty () { m_kind
= BK_empty
; }
174 bool is_deleted () const { return m_kind
== BK_deleted
; }
175 bool is_empty () const { return m_kind
== BK_empty
; }
178 binding_key (enum binding_kind kind
) : m_kind (kind
) {}
180 hashval_t
impl_hash () const
184 bool impl_eq (const binding_key
&other
) const
186 return m_kind
== other
.m_kind
;
190 enum binding_kind m_kind
;
193 /* Concrete subclass of binding_key, for describing a concrete range of
194 bits within the binding_map (e.g. "bits 8-15"). */
196 class concrete_binding
: public binding_key
199 /* This class is its own key for the purposes of consolidation. */
200 typedef concrete_binding key_t
;
202 concrete_binding (bit_offset_t start_bit_offset
, bit_size_t size_in_bits
,
203 enum binding_kind kind
)
204 : binding_key (kind
),
205 m_start_bit_offset (start_bit_offset
),
206 m_size_in_bits (size_in_bits
)
208 bool concrete_p () const FINAL OVERRIDE
{ return true; }
210 hashval_t
hash () const
212 inchash::hash hstate
;
213 hstate
.add_wide_int (m_start_bit_offset
);
214 hstate
.add_wide_int (m_size_in_bits
);
215 return hstate
.end () ^ binding_key::impl_hash ();
217 bool operator== (const concrete_binding
&other
) const
219 if (!binding_key::impl_eq (other
))
221 return (m_start_bit_offset
== other
.m_start_bit_offset
222 && m_size_in_bits
== other
.m_size_in_bits
);
225 void dump_to_pp (pretty_printer
*pp
, bool simple
) const FINAL OVERRIDE
;
227 const concrete_binding
*dyn_cast_concrete_binding () const FINAL OVERRIDE
230 bit_offset_t
get_start_bit_offset () const { return m_start_bit_offset
; }
231 bit_size_t
get_size_in_bits () const { return m_size_in_bits
; }
232 /* Return the next bit offset after the end of this binding. */
233 bit_offset_t
get_next_bit_offset () const
235 return m_start_bit_offset
+ m_size_in_bits
;
238 bool overlaps_p (const concrete_binding
&other
) const;
240 static int cmp_ptr_ptr (const void *, const void *);
243 bit_offset_t m_start_bit_offset
;
244 bit_size_t m_size_in_bits
;
249 template <> struct default_hash_traits
<ana::concrete_binding
>
250 : public member_function_hash_traits
<ana::concrete_binding
>
252 static const bool empty_zero_p
= true;
257 /* Concrete subclass of binding_key, for describing a symbolic set of
258 bits within the binding_map in terms of a region (e.g. "arr[i]"). */
260 class symbolic_binding
: public binding_key
263 /* This class is its own key for the purposes of consolidation. */
264 typedef symbolic_binding key_t
;
266 symbolic_binding (const region
*region
, enum binding_kind kind
)
267 : binding_key (kind
),
270 bool concrete_p () const FINAL OVERRIDE
{ return false; }
272 hashval_t
hash () const
274 return (binding_key::impl_hash () ^ (long)m_region
);
276 bool operator== (const symbolic_binding
&other
) const
278 if (!binding_key::impl_eq (other
))
280 return (m_region
== other
.m_region
);
283 void dump_to_pp (pretty_printer
*pp
, bool simple
) const FINAL OVERRIDE
;
285 const region
*get_region () const { return m_region
; }
287 static int cmp_ptr_ptr (const void *, const void *);
290 const region
*m_region
;
295 template <> struct default_hash_traits
<ana::symbolic_binding
>
296 : public member_function_hash_traits
<ana::symbolic_binding
>
298 static const bool empty_zero_p
= true;
303 /* A mapping from binding_keys to svalues, for use by binding_cluster
304 and compound_svalue. */
309 typedef hash_map
<const binding_key
*, const svalue
*> map_t
;
310 typedef map_t::iterator iterator_t
;
312 binding_map () : m_map () {}
313 binding_map (const binding_map
&other
);
314 binding_map
& operator=(const binding_map
&other
);
316 bool operator== (const binding_map
&other
) const;
317 bool operator!= (const binding_map
&other
) const
319 return !(*this == other
);
322 hashval_t
hash () const;
324 const svalue
*get (const binding_key
*key
) const
326 const svalue
**slot
= const_cast<map_t
&> (m_map
).get (key
);
332 bool put (const binding_key
*k
, const svalue
*v
)
335 return m_map
.put (k
, v
);
338 void remove (const binding_key
*k
) { m_map
.remove (k
); }
339 void empty () { m_map
.empty (); }
341 iterator_t
begin () const { return m_map
.begin (); }
342 iterator_t
end () const { return m_map
.end (); }
343 size_t elements () const { return m_map
.elements (); }
345 void dump_to_pp (pretty_printer
*pp
, bool simple
, bool multiline
) const;
346 void dump (bool simple
) const;
348 json::object
*to_json () const;
350 bool apply_ctor_to_region (const region
*parent_reg
, tree ctor
,
351 region_model_manager
*mgr
);
353 static int cmp (const binding_map
&map1
, const binding_map
&map2
);
356 bool apply_ctor_val_to_range (const region
*parent_reg
,
357 region_model_manager
*mgr
,
358 tree min_index
, tree max_index
,
360 bool apply_ctor_pair_to_child_region (const region
*parent_reg
,
361 region_model_manager
*mgr
,
362 tree index
, tree val
);
367 /* Concept: BindingVisitor, for use by binding_cluster::for_each_binding
368 and store::for_each_binding.
371 void on_binding (const binding_key *key, const svalue *&sval);
374 /* All of the bindings within a store for regions that share the same
377 class binding_cluster
382 typedef hash_map
<const binding_key
*, const svalue
*> map_t
;
383 typedef map_t::iterator iterator_t
;
385 binding_cluster (const region
*base_region
)
386 : m_base_region (base_region
), m_map (),
387 m_escaped (false), m_touched (false) {}
388 binding_cluster (const binding_cluster
&other
);
389 binding_cluster
& operator=(const binding_cluster
&other
);
391 bool operator== (const binding_cluster
&other
) const;
392 bool operator!= (const binding_cluster
&other
) const
394 return !(*this == other
);
397 hashval_t
hash () const;
399 bool symbolic_p () const;
401 void dump_to_pp (pretty_printer
*pp
, bool simple
, bool multiline
) const;
402 void dump (bool simple
) const;
404 json::object
*to_json () const;
406 void bind (store_manager
*mgr
, const region
*, const svalue
*,
409 void clobber_region (store_manager
*mgr
, const region
*reg
);
410 void purge_region (store_manager
*mgr
, const region
*reg
);
411 void zero_fill_region (store_manager
*mgr
, const region
*reg
);
412 void mark_region_as_unknown (store_manager
*mgr
, const region
*reg
);
414 const svalue
*get_binding (store_manager
*mgr
, const region
*reg
,
415 binding_kind kind
) const;
416 const svalue
*get_binding_recursive (store_manager
*mgr
,
418 enum binding_kind kind
) const;
419 const svalue
*get_any_binding (store_manager
*mgr
,
420 const region
*reg
) const;
421 const svalue
*maybe_get_compound_binding (store_manager
*mgr
,
422 const region
*reg
) const;
424 void remove_overlapping_bindings (store_manager
*mgr
, const region
*reg
);
426 template <typename T
>
427 void for_each_value (void (*cb
) (const svalue
*sval
, T user_data
),
430 for (map_t::iterator iter
= m_map
.begin (); iter
!= m_map
.end (); ++iter
)
431 cb ((*iter
).second
, user_data
);
434 static bool can_merge_p (const binding_cluster
*cluster_a
,
435 const binding_cluster
*cluster_b
,
436 binding_cluster
*out_cluster
,
438 model_merger
*merger
);
439 void make_unknown_relative_to (const binding_cluster
*other_cluster
,
442 void mark_as_escaped ();
443 void on_unknown_fncall (const gcall
*call
, store_manager
*mgr
);
445 bool escaped_p () const { return m_escaped
; }
446 bool touched_p () const { return m_touched
; }
448 bool redundant_p () const;
449 bool empty_p () const { return m_map
.elements () == 0; }
451 void get_representative_path_vars (const region_model
*model
,
453 const region
*base_reg
,
455 auto_vec
<path_var
> *out_pvs
) const;
457 const svalue
*maybe_get_simple_value (store_manager
*mgr
) const;
459 template <typename BindingVisitor
>
460 void for_each_binding (BindingVisitor
&v
)
462 for (map_t::iterator iter
= m_map
.begin (); iter
!= m_map
.end (); ++iter
)
464 const binding_key
*key
= (*iter
).first
;
465 const svalue
*&sval
= (*iter
).second
;
466 v
.on_binding (key
, sval
);
470 iterator_t
begin () const { return m_map
.begin (); }
471 iterator_t
end () const { return m_map
.end (); }
473 const binding_map
&get_map () const { return m_map
; }
476 const svalue
*get_any_value (const binding_key
*key
) const;
477 void get_overlapping_bindings (store_manager
*mgr
, const region
*reg
,
478 auto_vec
<const binding_key
*> *out
);
479 void bind_compound_sval (store_manager
*mgr
,
481 const compound_svalue
*compound_sval
);
482 void bind_key (const binding_key
*key
, const svalue
*sval
);
484 const region
*m_base_region
;
488 /* Has a pointer to this cluster "escaped" into a part of the program
489 we don't know about (via a call to a function with an unknown body,
490 or by being passed in as a pointer param of a "top-level" function call).
491 Such regions could be overwritten when other such functions are called,
492 even if the region is no longer reachable by pointers that we are
496 /* Has this cluster been written to via a symbolic binding?
497 If so, then we don't know anything about unbound subregions,
498 so we can't use initial_svalue, treat them as uninitialized, or
499 inherit values from a parent region. */
503 /* The mapping from regions to svalues.
504 This is actually expressed by subdividing into clusters, to better
510 typedef hash_map
<const region
*, binding_cluster
*> cluster_map_t
;
513 store (const store
&other
);
516 store
&operator= (const store
&other
);
518 bool operator== (const store
&other
) const;
519 bool operator!= (const store
&other
) const
521 return !(*this == other
);
524 hashval_t
hash () const;
526 void dump_to_pp (pretty_printer
*pp
, bool summarize
, bool multiline
,
527 store_manager
*mgr
) const;
528 void dump (bool simple
) const;
529 void summarize_to_pp (pretty_printer
*pp
, bool simple
) const;
531 json::object
*to_json () const;
533 const svalue
*get_direct_binding (store_manager
*mgr
, const region
*reg
);
534 const svalue
*get_default_binding (store_manager
*mgr
, const region
*reg
);
535 const svalue
*get_any_binding (store_manager
*mgr
, const region
*reg
) const;
537 bool called_unknown_fn_p () const { return m_called_unknown_fn
; }
539 void set_value (store_manager
*mgr
, const region
*lhs_reg
,
540 const svalue
*rhs_sval
, enum binding_kind kind
);
541 void clobber_region (store_manager
*mgr
, const region
*reg
);
542 void purge_region (store_manager
*mgr
, const region
*reg
);
543 void zero_fill_region (store_manager
*mgr
, const region
*reg
);
544 void mark_region_as_unknown (store_manager
*mgr
, const region
*reg
);
546 const binding_cluster
*get_cluster (const region
*base_reg
) const;
547 binding_cluster
*get_cluster (const region
*base_reg
);
548 binding_cluster
*get_or_create_cluster (const region
*base_reg
);
549 void purge_cluster (const region
*base_reg
);
551 template <typename T
>
552 void for_each_cluster (void (*cb
) (const region
*base_reg
, T user_data
),
555 for (cluster_map_t::iterator iter
= m_cluster_map
.begin ();
556 iter
!= m_cluster_map
.end (); ++iter
)
557 cb ((*iter
).first
, user_data
);
560 static bool can_merge_p (const store
*store_a
, const store
*store_b
,
561 store
*out_store
, store_manager
*mgr
,
562 model_merger
*merger
);
564 void mark_as_escaped (const region
*base_reg
);
565 void on_unknown_fncall (const gcall
*call
, store_manager
*mgr
);
566 bool escaped_p (const region
*reg
) const;
568 void get_representative_path_vars (const region_model
*model
,
571 auto_vec
<path_var
> *out_pvs
) const;
573 cluster_map_t::iterator
begin () const { return m_cluster_map
.begin (); }
574 cluster_map_t::iterator
end () const { return m_cluster_map
.end (); }
576 tristate
eval_alias (const region
*base_reg_a
,
577 const region
*base_reg_b
) const;
579 template <typename BindingVisitor
>
580 void for_each_binding (BindingVisitor
&v
)
582 for (cluster_map_t::iterator iter
= m_cluster_map
.begin ();
583 iter
!= m_cluster_map
.end (); ++iter
)
584 (*iter
).second
->for_each_binding (v
);
587 void canonicalize (store_manager
*mgr
);
588 void loop_replay_fixup (const store
*other_store
,
589 region_model_manager
*mgr
);
592 void remove_overlapping_bindings (store_manager
*mgr
, const region
*reg
);
593 tristate
eval_alias_1 (const region
*base_reg_a
,
594 const region
*base_reg_b
) const;
596 cluster_map_t m_cluster_map
;
598 /* If this is true, then unknown code has been called, and so
599 any global variable that isn't currently modelled by the store
600 has unknown state, rather than being in an "initial state".
601 This is to avoid having to mark (and thus explicitly track)
602 every global when an unknown function is called; instead, they
603 can be tracked implicitly. */
604 bool m_called_unknown_fn
;
607 /* A class responsible for owning and consolidating binding keys
608 (both concrete and symbolic).
609 Key instances are immutable as far as clients are concerned, so they
610 are provided as "const" ptrs. */
615 store_manager (region_model_manager
*mgr
) : m_mgr (mgr
) {}
617 /* binding consolidation. */
618 const concrete_binding
*
619 get_concrete_binding (bit_offset_t start_bit_offset
,
620 bit_offset_t size_in_bits
,
621 enum binding_kind kind
);
622 const symbolic_binding
*
623 get_symbolic_binding (const region
*region
,
624 enum binding_kind kind
);
626 region_model_manager
*get_svalue_manager () const
631 void log_stats (logger
*logger
, bool show_objs
) const;
634 region_model_manager
*m_mgr
;
635 consolidation_map
<concrete_binding
> m_concrete_binding_key_mgr
;
636 consolidation_map
<symbolic_binding
> m_symbolic_binding_key_mgr
;
641 #endif /* GCC_ANALYZER_STORE_H */