From: Roland McGrath Date: Wed, 26 Aug 2009 04:14:38 +0000 (-0700) Subject: Fix tracker circular ref handling for sharing differences. dwarfcmp -l improvements. X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=53c2f702277f4c50e663be7a110973779eebe080;p=thirdparty%2Felfutils.git Fix tracker circular ref handling for sharing differences. dwarfcmp -l improvements. --- diff --git a/libdw/ChangeLog b/libdw/ChangeLog index 090e3be68..308fda3ef 100644 --- a/libdw/ChangeLog +++ b/libdw/ChangeLog @@ -1,3 +1,9 @@ +2009-08-25 Roland McGrath + + * c++/dwarf_tracker: Soup up circular reference handling to handle + mismatched duplication vs sharing in semantically matching trees. + * c++/dwarf_comparator: Update user. + 2009-08-21 Roland McGrath * c++/subr.hh (hash): Use elf_gnu_hash algorithm. diff --git a/libdw/c++/dwarf_comparator b/libdw/c++/dwarf_comparator index 3e1366391..e34677517 100644 --- a/libdw/c++/dwarf_comparator +++ b/libdw/c++/dwarf_comparator @@ -142,23 +142,34 @@ namespace elfutils return false; } - struct reference_match + struct reference_match {}; + + // This call is used purely in hopes of a cache hit. + inline bool prematch (reference_match &, const die1 &, const die2 &) { - inline bool cannot_match () const - { - return false; - } - inline void notice_match (const die2 &, bool) const - { - } - }; + return false; + } + // This call is used only as part of a real reference lookup. inline bool reference_matched (reference_match &, const die1 &, const die2 &) { return false; } + // Check for a negative cache hit after prematch or reference_match. + inline bool cannot_match (reference_match &, const die1 &, const die2 &) + { + return false; + } + + // This can cache a result. + inline bool notice_match (reference_match &, const die1 &, const die2 &, + bool result) + { + return result; + } + inline dwarf_tracker_base () {} @@ -235,9 +246,13 @@ namespace elfutils inline bool match (const die1 &a, const die2 &b) { _m_tracker.visit (a, b); - return (a.tag () == b.tag () - && match (a.attributes (), b.attributes ()) - && match (a.children (), b.children ())); + if (a.tag () != b.tag ()) + return nomatch (a, b, "DIE tag"); + if (!match (a.attributes (), b.attributes ())) + return nomatch (a, b, "DIE attrs"); + if (! match (a.children (), b.children ())) + return nomatch (a, b, "DIE children"); + return true; } template @@ -285,8 +300,7 @@ namespace elfutils bool> { dwarf_comparator &_m_cmp; - match_sorted (dwarf_comparator &cmp) + match_sorted (dwarf_comparator &cmp) : _m_cmp (cmp) {} @@ -332,7 +346,7 @@ namespace elfutils populate (sorted2, b); std::pair result + typename ait2_map::iterator> result = std::mismatch (sorted1.begin (), sorted1.end (), sorted2.begin (), match_sorted (*this)); if (result.first == sorted1.end () @@ -361,18 +375,16 @@ namespace elfutils { // Maybe the tracker has already cached a correspondence of DIEs. typename tracker::reference_match matched; - if (_m_tracker.reference_matched (matched, a, b)) + if (_m_tracker.prematch (matched, a, b)) return true; - if (matched.cannot_match ()) - return false; + if (_m_tracker.cannot_match (matched, a, b)) + return nomatch (*a, *b, "children cached"); bool result = match_child (a, b); // Let the tracker cache a result for its reference_matched. - matched.notice_match (b, result); - - return result; + return _m_tracker.notice_match (matched, a, b, result); } inline bool match (const children1 &a, const children2 &b) @@ -458,6 +470,12 @@ namespace elfutils return false; } + // This is a convenient place to hack for debugging output and such. + inline bool nomatch (const die1 &, const die2 &, const char *) + { + return false; + } + /* We call references equal if they are literally the same DIE, or if they are identical subtrees sitting in matching contexts. The tracker's context_match method decides what that means. */ @@ -474,19 +492,19 @@ namespace elfutils // Simplest mismatches with the cheapest checks first. if (a.tag () != b.tag ()) - return false; + return nomatch (a, b, "tag"); const bool has_children = a.has_children (); if (has_children != b.has_children ()) - return false; + return nomatch (a, b, "has_children"); // Maybe the tracker has already cached a correspondence of references. typename tracker::reference_match matched; if (_m_tracker.reference_matched (matched, ref1, ref2)) return true; - if (matched.cannot_match ()) - return false; + if (_m_tracker.cannot_match (matched, ref1, ref2)) + return nomatch (a, b, "cached"); // Now we really have to get the tracker involved. const typename tracker::left_context_type &lhs @@ -496,10 +514,12 @@ namespace elfutils /* First do the cheap mismatch check on the contexts, then check the contents and contexts in ascending order of costliness of a check. */ - if (_m_tracker.context_quick_mismatch (lhs, rhs) - || !match (a.attributes (), b.attributes ()) - || !_m_tracker.context_match (lhs, rhs)) - return false; + if (_m_tracker.context_quick_mismatch (lhs, rhs)) + return nomatch (a, b, "quick context"); + if (!match (a.attributes (), b.attributes ())) + return nomatch (a, b, "attribute"); + if (!_m_tracker.context_match (lhs, rhs)) + return nomatch (a, b, "context"); /* To compare the children, we have to clone the tracker and use a new one, in case of any reference attributes in their subtrees. @@ -510,12 +530,12 @@ namespace elfutils { typename tracker::subtracker t (_m_tracker, matched, lhs, rhs); result = subcomparator (t).equals (a.children (), b.children ()); + if (!result) + result = nomatch (a, b, "children"); } // Let the tracker cache a result for its reference_matched. - matched.notice_match (ref2, result); - - return result; + return _m_tracker.notice_match (matched, ref1, ref2, result); } public: diff --git a/libdw/c++/dwarf_tracker b/libdw/c++/dwarf_tracker index acff400cd..6f6351c36 100644 --- a/libdw/c++/dwarf_tracker +++ b/libdw/c++/dwarf_tracker @@ -382,6 +382,7 @@ namespace elfutils typedef typename _base::die1 die1; typedef typename _base::die2 die2; typedef typename _base::dwarf1_ref dwarf1_ref; + class reference_match; protected: typedef dwarf_path_finder tracker1; @@ -406,6 +407,10 @@ namespace elfutils } }; + typedef std::tr1::unordered_map active_map; + active_map _m_active; + typedef std::pair > equiv_list; @@ -419,6 +424,10 @@ namespace elfutils return &(*_m_equiv)[a->identity ()]; } + typedef dwarf_tracker_base context_tracker; + typedef dwarf_comparator context_comparator; + /* Predicate for DIEs "equal enough" to match as context for a subtree. The definition we use is that the DIE has the same tag and all its attributes are equal, excepting that references in attribute values @@ -429,9 +438,9 @@ namespace elfutils { if (a->tag () != b->tag ()) return false; - dwarf_tracker_base t; - return (dwarf_comparator (t) - .equals (a->attributes (), b->attributes ())); + context_tracker t; + return context_comparator (t).equals (a->attributes (), + b->attributes ()); } }; @@ -509,51 +518,65 @@ namespace elfutils class reference_match { friend class dwarf_ref_tracker; - private: - equiv_list *_m_elt; + protected: + equiv_list *_m_lhs; + typename active_map::value_type *_m_rhs; + active_map *_m_active; public: inline reference_match () - : _m_elt (NULL) + : _m_lhs (NULL), _m_rhs (NULL), _m_active (NULL) {} inline ~reference_match () { - if (_m_elt != NULL) - _m_elt->first = NULL; - } - - inline bool cannot_match () const - { - return _m_elt == NULL; - } - - inline void notice_match (const die2 &b, bool matches) const - { - if (matches && _m_elt != NULL) - _m_elt->second.insert (b); + if (_m_lhs != NULL) + _m_lhs->first = NULL; + if (_m_rhs != NULL) + _m_active->erase (_m_rhs->first); } }; + /* A prematch during the main tree walk does the same cache lookup + as real reference matching. But it doesn't record itself as a + "walk in progress" for the circularity-catching logic. Doing so + can break that logic for comparison purposes. Since we key our + cache on identity, a lookup can hit a shared DIE as well as one + that is truly involved in our current walk. If we hit a shared + DIE on the main walk, and within that recursion (i.e. somewhere + in its children or along its own references) we encounter a + circularity, we'd take the main-walk's equiv_list record as the + root of the circularity on one side, while on the other side the + DIEs may not have been shared and so the same circularity is + actually rooted at the second instance of an identical DIE. */ + inline bool prematch (reference_match &matched, + const die1 &a, const die2 &b) + { + return reference_matched (matched, a, b, false); + } + inline bool - reference_matched (reference_match &matched, const die1 &a, const die2 &b) + reference_matched (reference_match &matched, const die1 &a, const die2 &b, + bool record = true) { equiv_list *elt = equiv_to (a); if (elt->first == NULL) { - /* Record that we have a walk in progress crossing A. - When MATCHED goes out of scope in our caller, its - destructor will reset ELT->first to clear this record. */ - elt->first = &b; - matched._m_elt = elt; + matched._m_lhs = elt; + + if (record) + /* Record that we have a walk in progress crossing A. + When MATCHED goes out of scope in our caller, its + destructor will reset ELT->first to clear this record. */ + elt->first = &b; // Short-circuit if we have already matched B to A. return elt->second.find (b) != elt->second.end (); } - /* We have a circularity. We can tell because ELT->first remains - set from an outer recursion still in progress. + /* We have a circularity on the left-hand side. We can tell because + ELT->first remains set from an outer recursion still in progress. The circular chain of references rooted at A matches B if B is also the root of its own circularity and everything along those @@ -564,7 +587,70 @@ namespace elfutils We actually record the pointer on the caller's stack rather than a copy of B, just because the iterator might be larger. */ - return *elt->first == b; + if ((*elt->first)->identity () == b->identity ()) + return true; + + /* Our right-hand side is not in lock-step on a matching circularity. + But it's still possible this is a matching reference nonetheless. + A difference in the sharing vs duplication of DIEs between the + left-hand and right-hand sides could mean that one side's chain of + references reaches the same cycle sooner than the other's. + + Consider: + + A1 -> A2 -> ... -> A1' -> A2 ... + B1 -> B2 -> ... -> B1 -> B2 ... + + Here A1' is an identical copy of A1, duplicated in the A object. + Otherwise A1 matches B1, A2 matches B2, etc. The B walk started + at B1 and hits it again at the step comparing B1 to A1'. But the + A walk has not hit A1 again yet (and perhaps it never will), so + our test above does not match. + + This is the simplest example. There might be more steps of the + reference chain that have duplicates on one side but have been + consolidated to a single entry on the other. There can also be + multiple reference attributes at each node that differ on this + issue, making all manner of tangled graphs that all really match + the same simpler graph (and thus each other). + + Now we start recording the state of the right-hand side reference + chain walk, and keep going. When the right-hand side then becomes + circular, we check that it has coincided with the left-hand side. + + This is guaranteed to terminate, at least. It should never have + any false positives, since that continuing walk would eventually + find the differences. We hope it doesn't have any false negatives + either, but to be sure of that would require more graph theory + than your humble writer can bring to bear. */ + + const std::pair p + = _m_active.insert (std::make_pair (b->identity (), &matched)); + if (p.second) + { + assert (p.first->second == &matched); + matched._m_lhs = elt; + matched._m_active = &_m_active; + matched._m_rhs = &*p.first; + return false; + } + + assert (p.first->second != &matched); + return p.first->second->_m_lhs == elt; + } + + inline bool cannot_match (reference_match &matched, + const die1 &, const die2 &) + { + return matched._m_lhs == NULL && matched._m_rhs == NULL; + } + + inline bool notice_match (reference_match &matched, + const die1 &, const die2 &b, bool matches) + { + if (matches && matched._m_lhs != NULL) + matched._m_lhs->second.insert (b); + return matches; } typedef dwarf_ref_tracker subtracker; @@ -578,7 +664,7 @@ namespace elfutils _m_right (proto._m_right, rhs), _m_equiv (proto._m_equiv), _m_delete_equiv (false) { - // We are starting a recursive consideration of a vs b. + // We are starting a recursive consideration of LHS vs RHS. } }; }; diff --git a/src/ChangeLog b/src/ChangeLog index dd71bf43e..c10f67b6c 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,8 @@ +2009-08-25 Roland McGrath + + * dwarfcmp.cc (talker): Track real vs fake-positive match result + and cache only real results in the real tracker. + 2009-08-20 Roland McGrath * dwarfcmp.cc (verbose): New variable. diff --git a/src/dwarfcmp.cc b/src/dwarfcmp.cc index 6e51e2d1a..5ef182e9d 100644 --- a/src/dwarfcmp.cc +++ b/src/dwarfcmp.cc @@ -130,7 +130,7 @@ open_file (const char *fname, int *fdp) // XXX make translation-friendly -template +template struct talker : public dwarf_ref_tracker { typedef dwarf_tracker_base _base; @@ -145,6 +145,7 @@ struct talker : public dwarf_ref_tracker const typename dwarf1::debug_info_entry *a_; const typename dwarf2::debug_info_entry *b_; bool result_; + bool visiting_result_; inline talker () : a_ (NULL), b_ (NULL), result_ (true) @@ -158,12 +159,6 @@ struct talker : public dwarf_ref_tracker { } - inline bool keep_going () - { - result_ = false; - return verbose; - } - inline ostream &location () const { return cout << hex << a_->offset () << " vs " << b_->offset () << ": "; @@ -174,12 +169,35 @@ struct talker : public dwarf_ref_tracker { a_ = &a; b_ = &b; - if (a.tag () != b.tag ()) + visiting_result_ = a.tag () == b.tag (); + if (!visiting_result_) location () << dwarf::tags::name (a.tag ()) << " vs " << dwarf::tags::name (b.tag ()); } + inline bool keep_going () + { + result_ = visiting_result_ = false; + return print_all; + } + + inline bool notice_match (typename subtracker::reference_match &matched, + const die1 &a, const die2 &b, bool matches) + { + /* In the main walk, a mismatch would have gone to keep_going. + But in reference_match, the comparator uses a subtracker. + It won't have set visiting_result_, but it will return a + real non-matching result we can catch here. */ + if (!matches) + visiting_result_ = false; + + // Record the real result in the cache, not a fake match for -l. + subtracker::notice_match (matched, a, b, visiting_result_); + + return matches || keep_going (); + } + inline bool mismatch (cu1 &it1, const cu1 &end1, cu2 &it2, const cu2 &end2) { @@ -259,8 +277,9 @@ struct talker : public dwarf_ref_tracker inline void reference_mismatch (const die1 &ref1, const die2 &ref2) { subcomparator cmp (*(subtracker *) this); - if (cmp.equals (ref1, ref2)) - cout << " (XXX refs now equal again!)"; + if (cmp.equals (ref1, ref2) && !print_all) + cout << " (XXX refs now equal again!)" + << (cmp.equals (*ref1, *ref2) ? "" : " (and not identical!!)"); else if (cmp.equals (*ref1, *ref2)) cout << " (identical but contexts mismatch)"; else @@ -293,10 +312,17 @@ struct quiet_cmp : public cmp > {}; -// To be noisy, the talker wraps the standard tracker with verbosity hooks. template +static inline bool +quiet_compare (const dwarf1 &a, const dwarf2 &b) +{ + return quiet_cmp () (a, b); +} + +// To be noisy, the talker wraps the standard tracker with verbosity hooks. +template struct noisy_cmp - : public cmp > + : public cmp > { inline bool operator () (const dwarf1 &a, const dwarf2 &b) { @@ -305,6 +331,23 @@ struct noisy_cmp }; +template +static inline bool +noisy_compare (const dwarf1 &a, const dwarf2 &b) +{ + return noisy_cmp () (a, b); +} + +template +static inline bool +noisy_compare (const dwarf1 &a, const dwarf2 &b, bool print_all) +{ + return (print_all + ? noisy_cmp () (a, b) + : noisy_cmp () (a, b)); +} + + // Test that one comparison works as expected. template static void @@ -313,7 +356,7 @@ test_compare (const dwarf1 &file1, const dwarf2 &file2, bool expect) if (quiet_cmp () (file1, file2) != expect) { if (expect) - noisy_cmp () (file1, file2); + noisy_compare (file1, file2, true); throw std::logic_error (__PRETTY_FUNCTION__); } } @@ -416,8 +459,8 @@ main (int argc, char *argv[]) dwarf file2 (dw2); bool same = (quiet - ? quiet_cmp () (file1, file2) - : noisy_cmp () (file1, file2)); + ? quiet_compare (file1, file2) + : noisy_compare (file1, file2, verbose)); if (test_writer & 1) test_output (file1, file2, false, file1, file2, same);