+2009-08-25 Roland McGrath <roland@redhat.com>
+
+ * c++/dwarf_tracker: Soup up circular reference handling to handle
+ mismatched duplication vs sharing in semantically matching trees.
+ * c++/dwarf_comparator: Update user.
+
2009-08-21 Roland McGrath <roland@redhat.com>
* c++/subr.hh (hash<std::string>): Use elf_gnu_hash algorithm.
return false;
}
- struct reference_match
+ struct reference_match {};
+
+ // This call is used purely in hopes of a cache hit.
+ inline bool prematch (reference_match &, const die1 &, const die2 &)
{
- inline bool cannot_match () const
- {
- return false;
- }
- inline void notice_match (const die2 &, bool) const
- {
- }
- };
+ return false;
+ }
+ // This call is used only as part of a real reference lookup.
inline bool reference_matched (reference_match &,
const die1 &, const die2 &)
{
return false;
}
+ // Check for a negative cache hit after prematch or reference_match.
+ inline bool cannot_match (reference_match &, const die1 &, const die2 &)
+ {
+ return false;
+ }
+
+ // This can cache a result.
+ inline bool notice_match (reference_match &, const die1 &, const die2 &,
+ bool result)
+ {
+ return result;
+ }
+
inline dwarf_tracker_base ()
{}
inline bool match (const die1 &a, const die2 &b)
{
_m_tracker.visit (a, b);
- return (a.tag () == b.tag ()
- && match (a.attributes (), b.attributes ())
- && match (a.children (), b.children ()));
+ if (a.tag () != b.tag ())
+ return nomatch (a, b, "DIE tag");
+ if (!match (a.attributes (), b.attributes ()))
+ return nomatch (a, b, "DIE attrs");
+ if (! match (a.children (), b.children ()))
+ return nomatch (a, b, "DIE children");
+ return true;
}
template<typename in, typename out>
bool>
{
dwarf_comparator &_m_cmp;
- match_sorted (dwarf_comparator<dwarf1, dwarf2,
- ignore_refs, tracker> &cmp)
+ match_sorted (dwarf_comparator<dwarf1, dwarf2, ignore_refs, tracker> &cmp)
: _m_cmp (cmp)
{}
populate (sorted2, b);
std::pair<typename ait1_map::iterator,
- typename ait2_map::iterator> result
+ typename ait2_map::iterator> result
= std::mismatch (sorted1.begin (), sorted1.end (),
sorted2.begin (), match_sorted (*this));
if (result.first == sorted1.end ()
{
// Maybe the tracker has already cached a correspondence of DIEs.
typename tracker::reference_match matched;
- if (_m_tracker.reference_matched (matched, a, b))
+ if (_m_tracker.prematch (matched, a, b))
return true;
- if (matched.cannot_match ())
- return false;
+ if (_m_tracker.cannot_match (matched, a, b))
+ return nomatch (*a, *b, "children cached");
bool result = match_child (a, b);
// Let the tracker cache a result for its reference_matched.
- matched.notice_match (b, result);
-
- return result;
+ return _m_tracker.notice_match (matched, a, b, result);
}
inline bool match (const children1 &a, const children2 &b)
return false;
}
+ // This is a convenient place to hack for debugging output and such.
+ inline bool nomatch (const die1 &, const die2 &, const char *)
+ {
+ return false;
+ }
+
/* We call references equal if they are literally the same DIE,
or if they are identical subtrees sitting in matching contexts.
The tracker's context_match method decides what that means. */
// Simplest mismatches with the cheapest checks first.
if (a.tag () != b.tag ())
- return false;
+ return nomatch (a, b, "tag");
const bool has_children = a.has_children ();
if (has_children != b.has_children ())
- return false;
+ return nomatch (a, b, "has_children");
// Maybe the tracker has already cached a correspondence of references.
typename tracker::reference_match matched;
if (_m_tracker.reference_matched (matched, ref1, ref2))
return true;
- if (matched.cannot_match ())
- return false;
+ if (_m_tracker.cannot_match (matched, ref1, ref2))
+ return nomatch (a, b, "cached");
// Now we really have to get the tracker involved.
const typename tracker::left_context_type &lhs
/* First do the cheap mismatch check on the contexts, then check the
contents and contexts in ascending order of costliness of a check. */
- if (_m_tracker.context_quick_mismatch (lhs, rhs)
- || !match (a.attributes (), b.attributes ())
- || !_m_tracker.context_match (lhs, rhs))
- return false;
+ if (_m_tracker.context_quick_mismatch (lhs, rhs))
+ return nomatch (a, b, "quick context");
+ if (!match (a.attributes (), b.attributes ()))
+ return nomatch (a, b, "attribute");
+ if (!_m_tracker.context_match (lhs, rhs))
+ return nomatch (a, b, "context");
/* To compare the children, we have to clone the tracker and use a
new one, in case of any reference attributes in their subtrees.
{
typename tracker::subtracker t (_m_tracker, matched, lhs, rhs);
result = subcomparator (t).equals (a.children (), b.children ());
+ if (!result)
+ result = nomatch (a, b, "children");
}
// Let the tracker cache a result for its reference_matched.
- matched.notice_match (ref2, result);
-
- return result;
+ return _m_tracker.notice_match (matched, ref1, ref2, result);
}
public:
typedef typename _base::die1 die1;
typedef typename _base::die2 die2;
typedef typename _base::dwarf1_ref dwarf1_ref;
+ class reference_match;
protected:
typedef dwarf_path_finder<dwarf1> tracker1;
}
};
+ typedef std::tr1::unordered_map<dwarf::debug_info_entry::identity_type,
+ reference_match *> active_map;
+ active_map _m_active;
+
typedef std::pair<const die2 *,
std::tr1::unordered_set<die2, ref_hasher, same_ref>
> equiv_list;
return &(*_m_equiv)[a->identity ()];
}
+ typedef dwarf_tracker_base<dwarf1, dwarf2> context_tracker;
+ typedef dwarf_comparator<dwarf1, dwarf2, true,
+ context_tracker> context_comparator;
+
/* Predicate for DIEs "equal enough" to match as context for a subtree.
The definition we use is that the DIE has the same tag and all its
attributes are equal, excepting that references in attribute values
{
if (a->tag () != b->tag ())
return false;
- dwarf_tracker_base<dwarf1, dwarf2> t;
- return (dwarf_comparator<dwarf1, dwarf2, true> (t)
- .equals (a->attributes (), b->attributes ()));
+ context_tracker t;
+ return context_comparator (t).equals (a->attributes (),
+ b->attributes ());
}
};
class reference_match
{
friend class dwarf_ref_tracker;
- private:
- equiv_list *_m_elt;
+ protected:
+ equiv_list *_m_lhs;
+ typename active_map::value_type *_m_rhs;
+ active_map *_m_active;
public:
inline reference_match ()
- : _m_elt (NULL)
+ : _m_lhs (NULL), _m_rhs (NULL), _m_active (NULL)
{}
inline ~reference_match ()
{
- if (_m_elt != NULL)
- _m_elt->first = NULL;
- }
-
- inline bool cannot_match () const
- {
- return _m_elt == NULL;
- }
-
- inline void notice_match (const die2 &b, bool matches) const
- {
- if (matches && _m_elt != NULL)
- _m_elt->second.insert (b);
+ if (_m_lhs != NULL)
+ _m_lhs->first = NULL;
+ if (_m_rhs != NULL)
+ _m_active->erase (_m_rhs->first);
}
};
+ /* A prematch during the main tree walk does the same cache lookup
+ as real reference matching. But it doesn't record itself as a
+ "walk in progress" for the circularity-catching logic. Doing so
+ can break that logic for comparison purposes. Since we key our
+ cache on identity, a lookup can hit a shared DIE as well as one
+ that is truly involved in our current walk. If we hit a shared
+ DIE on the main walk, and within that recursion (i.e. somewhere
+ in its children or along its own references) we encounter a
+ circularity, we'd take the main-walk's equiv_list record as the
+ root of the circularity on one side, while on the other side the
+ DIEs may not have been shared and so the same circularity is
+ actually rooted at the second instance of an identical DIE. */
+ inline bool prematch (reference_match &matched,
+ const die1 &a, const die2 &b)
+ {
+ return reference_matched (matched, a, b, false);
+ }
+
inline bool
- reference_matched (reference_match &matched, const die1 &a, const die2 &b)
+ reference_matched (reference_match &matched, const die1 &a, const die2 &b,
+ bool record = true)
{
equiv_list *elt = equiv_to (a);
if (elt->first == NULL)
{
- /* Record that we have a walk in progress crossing A.
- When MATCHED goes out of scope in our caller, its
- destructor will reset ELT->first to clear this record. */
- elt->first = &b;
- matched._m_elt = elt;
+ matched._m_lhs = elt;
+
+ if (record)
+ /* Record that we have a walk in progress crossing A.
+ When MATCHED goes out of scope in our caller, its
+ destructor will reset ELT->first to clear this record. */
+ elt->first = &b;
// Short-circuit if we have already matched B to A.
return elt->second.find (b) != elt->second.end ();
}
- /* We have a circularity. We can tell because ELT->first remains
- set from an outer recursion still in progress.
+ /* We have a circularity on the left-hand side. We can tell because
+ ELT->first remains set from an outer recursion still in progress.
The circular chain of references rooted at A matches B if B is
also the root of its own circularity and everything along those
We actually record the pointer on the caller's stack rather
than a copy of B, just because the iterator might be larger. */
- return *elt->first == b;
+ if ((*elt->first)->identity () == b->identity ())
+ return true;
+
+ /* Our right-hand side is not in lock-step on a matching circularity.
+ But it's still possible this is a matching reference nonetheless.
+ A difference in the sharing vs duplication of DIEs between the
+ left-hand and right-hand sides could mean that one side's chain of
+ references reaches the same cycle sooner than the other's.
+
+ Consider:
+
+ A1 -> A2 -> ... -> A1' -> A2 ...
+ B1 -> B2 -> ... -> B1 -> B2 ...
+
+ Here A1' is an identical copy of A1, duplicated in the A object.
+ Otherwise A1 matches B1, A2 matches B2, etc. The B walk started
+ at B1 and hits it again at the step comparing B1 to A1'. But the
+ A walk has not hit A1 again yet (and perhaps it never will), so
+ our test above does not match.
+
+ This is the simplest example. There might be more steps of the
+ reference chain that have duplicates on one side but have been
+ consolidated to a single entry on the other. There can also be
+ multiple reference attributes at each node that differ on this
+ issue, making all manner of tangled graphs that all really match
+ the same simpler graph (and thus each other).
+
+ Now we start recording the state of the right-hand side reference
+ chain walk, and keep going. When the right-hand side then becomes
+ circular, we check that it has coincided with the left-hand side.
+
+ This is guaranteed to terminate, at least. It should never have
+ any false positives, since that continuing walk would eventually
+ find the differences. We hope it doesn't have any false negatives
+ either, but to be sure of that would require more graph theory
+ than your humble writer can bring to bear. */
+
+ const std::pair<typename active_map::iterator, bool> p
+ = _m_active.insert (std::make_pair (b->identity (), &matched));
+ if (p.second)
+ {
+ assert (p.first->second == &matched);
+ matched._m_lhs = elt;
+ matched._m_active = &_m_active;
+ matched._m_rhs = &*p.first;
+ return false;
+ }
+
+ assert (p.first->second != &matched);
+ return p.first->second->_m_lhs == elt;
+ }
+
+ inline bool cannot_match (reference_match &matched,
+ const die1 &, const die2 &)
+ {
+ return matched._m_lhs == NULL && matched._m_rhs == NULL;
+ }
+
+ inline bool notice_match (reference_match &matched,
+ const die1 &, const die2 &b, bool matches)
+ {
+ if (matches && matched._m_lhs != NULL)
+ matched._m_lhs->second.insert (b);
+ return matches;
}
typedef dwarf_ref_tracker subtracker;
_m_right (proto._m_right, rhs),
_m_equiv (proto._m_equiv), _m_delete_equiv (false)
{
- // We are starting a recursive consideration of a vs b.
+ // We are starting a recursive consideration of LHS vs RHS.
}
};
};
+2009-08-25 Roland McGrath <roland@redhat.com>
+
+ * dwarfcmp.cc (talker): Track real vs fake-positive match result
+ and cache only real results in the real tracker.
+
2009-08-20 Roland McGrath <roland@redhat.com>
* dwarfcmp.cc (verbose): New variable.
// XXX make translation-friendly
-template<class dwarf1, class dwarf2>
+template<class dwarf1, class dwarf2, bool print_all>
struct talker : public dwarf_ref_tracker<dwarf1, dwarf2>
{
typedef dwarf_tracker_base<dwarf1, dwarf2> _base;
const typename dwarf1::debug_info_entry *a_;
const typename dwarf2::debug_info_entry *b_;
bool result_;
+ bool visiting_result_;
inline talker ()
: a_ (NULL), b_ (NULL), result_ (true)
{
}
- inline bool keep_going ()
- {
- result_ = false;
- return verbose;
- }
-
inline ostream &location () const
{
return cout << hex << a_->offset () << " vs " << b_->offset () << ": ";
{
a_ = &a;
b_ = &b;
- if (a.tag () != b.tag ())
+ visiting_result_ = a.tag () == b.tag ();
+ if (!visiting_result_)
location () << dwarf::tags::name (a.tag ())
<< " vs "
<< dwarf::tags::name (b.tag ());
}
+ inline bool keep_going ()
+ {
+ result_ = visiting_result_ = false;
+ return print_all;
+ }
+
+ inline bool notice_match (typename subtracker::reference_match &matched,
+ const die1 &a, const die2 &b, bool matches)
+ {
+ /* In the main walk, a mismatch would have gone to keep_going.
+ But in reference_match, the comparator uses a subtracker.
+ It won't have set visiting_result_, but it will return a
+ real non-matching result we can catch here. */
+ if (!matches)
+ visiting_result_ = false;
+
+ // Record the real result in the cache, not a fake match for -l.
+ subtracker::notice_match (matched, a, b, visiting_result_);
+
+ return matches || keep_going ();
+ }
+
inline bool mismatch (cu1 &it1, const cu1 &end1,
cu2 &it2, const cu2 &end2)
{
inline void reference_mismatch (const die1 &ref1, const die2 &ref2)
{
subcomparator cmp (*(subtracker *) this);
- if (cmp.equals (ref1, ref2))
- cout << " (XXX refs now equal again!)";
+ if (cmp.equals (ref1, ref2) && !print_all)
+ cout << " (XXX refs now equal again!)"
+ << (cmp.equals (*ref1, *ref2) ? "" : " (and not identical!!)");
else if (cmp.equals (*ref1, *ref2))
cout << " (identical but contexts mismatch)";
else
: public cmp<dwarf1, dwarf2, dwarf_ref_tracker<dwarf1, dwarf2> >
{};
-// To be noisy, the talker wraps the standard tracker with verbosity hooks.
template<class dwarf1, class dwarf2>
+static inline bool
+quiet_compare (const dwarf1 &a, const dwarf2 &b)
+{
+ return quiet_cmp<dwarf1, dwarf2> () (a, b);
+}
+
+// To be noisy, the talker wraps the standard tracker with verbosity hooks.
+template<class dwarf1, class dwarf2, bool print_all>
struct noisy_cmp
- : public cmp<dwarf1, dwarf2, talker<dwarf1, dwarf2> >
+ : public cmp<dwarf1, dwarf2, talker<dwarf1, dwarf2, print_all> >
{
inline bool operator () (const dwarf1 &a, const dwarf2 &b)
{
};
+template<class dwarf1, class dwarf2, bool print_all>
+static inline bool
+noisy_compare (const dwarf1 &a, const dwarf2 &b)
+{
+ return noisy_cmp<dwarf1, dwarf2, print_all> () (a, b);
+}
+
+template<class dwarf1, class dwarf2>
+static inline bool
+noisy_compare (const dwarf1 &a, const dwarf2 &b, bool print_all)
+{
+ return (print_all
+ ? noisy_cmp<dwarf1, dwarf2, true> () (a, b)
+ : noisy_cmp<dwarf1, dwarf2, false> () (a, b));
+}
+
+
// Test that one comparison works as expected.
template<class dwarf1, class dwarf2>
static void
if (quiet_cmp<dwarf1, dwarf2> () (file1, file2) != expect)
{
if (expect)
- noisy_cmp<dwarf1, dwarf2> () (file1, file2);
+ noisy_compare (file1, file2, true);
throw std::logic_error (__PRETTY_FUNCTION__);
}
}
dwarf file2 (dw2);
bool same = (quiet
- ? quiet_cmp<dwarf, dwarf> () (file1, file2)
- : noisy_cmp<dwarf, dwarf> () (file1, file2));
+ ? quiet_compare (file1, file2)
+ : noisy_compare (file1, file2, verbose));
if (test_writer & 1)
test_output (file1, file2, false, file1, file2, same);