Fix tracker circular ref handling for sharing differences. dwarfcmp -l improvements.

author Roland McGrath <roland@redhat.com>

Wed, 26 Aug 2009 04:14:38 +0000 (21:14 -0700)

committer Roland McGrath <roland@redhat.com>

Wed, 26 Aug 2009 04:14:38 +0000 (21:14 -0700)
author Roland McGrath <roland@redhat.com>
Wed, 26 Aug 2009 04:14:38 +0000 (21:14 -0700)
committer Roland McGrath <roland@redhat.com>
Wed, 26 Aug 2009 04:14:38 +0000 (21:14 -0700)
diff --git a/libdw/ChangeLog b/libdw/ChangeLog

index 090e3be6876dbf78cfba4be84e4db990764536c2..308fda3efc68c26e9e94c14e48b9d8f22da1cb62 100644 (file)
--- a/libdw/ChangeLog
+++ b/libdw/ChangeLog
@@ -1,3 +1,9 @@
+2009-08-25  Roland McGrath  <roland@redhat.com>
+
+       * c++/dwarf_tracker: Soup up circular reference handling to handle
+       mismatched duplication vs sharing in semantically matching trees.
+       * c++/dwarf_comparator: Update user.
+
  2009-08-21  Roland McGrath  <roland@redhat.com>
  
         * c++/subr.hh (hash<std::string>): Use elf_gnu_hash algorithm.
diff --git a/libdw/c++/dwarf_comparator b/libdw/c++/dwarf_comparator

index 3e136639197334317ab3b3560f17a4464ec2a735..e3467751745c05405619dec28973d2973f8cee4d 100644 (file)
--- a/libdw/c++/dwarf_comparator
+++ b/libdw/c++/dwarf_comparator
@@ -142,23 +142,34 @@ namespace elfutils
        return false;
      }
  
-    struct reference_match
+    struct reference_match {};
+
+    // This call is used purely in hopes of a cache hit.
+    inline bool prematch (reference_match &, const die1 &, const die2 &)
      {
-      inline bool cannot_match () const
-      {
-       return false;
-      }
-      inline void notice_match (const die2 &, bool) const
-      {
-      }
-    };
+      return false;
+    }
  
+    // This call is used only as part of a real reference lookup.
      inline bool reference_matched (reference_match &,
                                    const die1 &, const die2 &)
      {
        return false;
      }
  
+    // Check for a negative cache hit after prematch or reference_match.
+    inline bool cannot_match (reference_match &, const die1 &, const die2 &)
+    {
+      return false;
+    }
+
+    // This can cache a result.
+    inline bool notice_match (reference_match &, const die1 &, const die2 &,
+                             bool result)
+    {
+      return result;
+    }
+
      inline dwarf_tracker_base ()
      {}
  
@@ -235,9 +246,13 @@ namespace elfutils
      inline bool match (const die1 &a, const die2 &b)
      {
        _m_tracker.visit (a, b);
-      return (a.tag () == b.tag ()
-             && match (a.attributes (), b.attributes ())
-             && match (a.children (), b.children ()));
+      if (a.tag () != b.tag ())
+       return nomatch (a, b, "DIE tag");
+      if (!match (a.attributes (), b.attributes ()))
+       return nomatch (a, b, "DIE attrs");
+      if (! match (a.children (), b.children ()))
+       return nomatch (a, b, "DIE children");
+      return true;
      }
  
      template<typename in, typename out>
@@ -285,8 +300,7 @@ namespace elfutils
                                     bool>
      {
        dwarf_comparator &_m_cmp;
-      match_sorted (dwarf_comparator<dwarf1, dwarf2,
-                   ignore_refs, tracker> &cmp)
+      match_sorted (dwarf_comparator<dwarf1, dwarf2, ignore_refs, tracker> &cmp)
         : _m_cmp (cmp)
        {}
  
@@ -332,7 +346,7 @@ namespace elfutils
           populate (sorted2, b);
  
           std::pair<typename ait1_map::iterator,
-           typename ait2_map::iterator> result
+                   typename ait2_map::iterator> result
             = std::mismatch (sorted1.begin (), sorted1.end (),
                              sorted2.begin (), match_sorted (*this));
           if (result.first == sorted1.end ()
@@ -361,18 +375,16 @@ namespace elfutils
      {
        // Maybe the tracker has already cached a correspondence of DIEs.
        typename tracker::reference_match matched;
-      if (_m_tracker.reference_matched (matched, a, b))
+      if (_m_tracker.prematch (matched, a, b))
         return true;
  
-      if (matched.cannot_match ())
-       return false;
+      if (_m_tracker.cannot_match (matched, a, b))
+       return nomatch (*a, *b, "children cached");
  
        bool result = match_child (a, b);
  
        // Let the tracker cache a result for its reference_matched.
-      matched.notice_match (b, result);
-
-      return result;
+      return _m_tracker.notice_match (matched, a, b, result);
      }
  
      inline bool match (const children1 &a, const children2 &b)
@@ -458,6 +470,12 @@ namespace elfutils
        return false;
      }
  
+    // This is a convenient place to hack for debugging output and such.
+    inline bool nomatch (const die1 &, const die2 &, const char *)
+    {
+      return false;
+    }
+
      /* We call references equal if they are literally the same DIE,
         or if they are identical subtrees sitting in matching contexts.
         The tracker's context_match method decides what that means.  */
@@ -474,19 +492,19 @@ namespace elfutils
  
        // Simplest mismatches with the cheapest checks first.
        if (a.tag () != b.tag ())
-       return false;
+       return nomatch (a, b, "tag");
  
        const bool has_children = a.has_children ();
        if (has_children != b.has_children ())
-       return false;
+       return nomatch (a, b, "has_children");
  
        // Maybe the tracker has already cached a correspondence of references.
        typename tracker::reference_match matched;
        if (_m_tracker.reference_matched (matched, ref1, ref2))
         return true;
  
-      if (matched.cannot_match ())
-       return false;
+      if (_m_tracker.cannot_match (matched, ref1, ref2))
+       return nomatch (a, b, "cached");
  
        // Now we really have to get the tracker involved.
        const typename tracker::left_context_type &lhs
@@ -496,10 +514,12 @@ namespace elfutils
  
        /* First do the cheap mismatch check on the contexts, then check the
          contents and contexts in ascending order of costliness of a check.  */
-      if (_m_tracker.context_quick_mismatch (lhs, rhs)
-         || !match (a.attributes (), b.attributes ())
-         || !_m_tracker.context_match (lhs, rhs))
-       return false;
+      if (_m_tracker.context_quick_mismatch (lhs, rhs))
+       return nomatch (a, b, "quick context");
+      if (!match (a.attributes (), b.attributes ()))
+       return nomatch (a, b, "attribute");
+      if (!_m_tracker.context_match (lhs, rhs))
+       return nomatch (a, b, "context");
  
        /* To compare the children, we have to clone the tracker and use a
          new one, in case of any reference attributes in their subtrees.
@@ -510,12 +530,12 @@ namespace elfutils
         {
           typename tracker::subtracker t (_m_tracker, matched, lhs, rhs);
           result = subcomparator (t).equals (a.children (), b.children ());
+         if (!result)
+           result = nomatch (a, b, "children");
         }
  
        // Let the tracker cache a result for its reference_matched.
-      matched.notice_match (ref2, result);
-
-      return result;
+      return _m_tracker.notice_match (matched, ref1, ref2, result);
      }
  
    public:
diff --git a/libdw/c++/dwarf_tracker b/libdw/c++/dwarf_tracker

index acff400cd00bfd8975b55b2262cdcb15d312c422..6f6351c36bb48e2542ac1a0c82fc842af1d33594 100644 (file)
--- a/libdw/c++/dwarf_tracker
+++ b/libdw/c++/dwarf_tracker
@@ -382,6 +382,7 @@ namespace elfutils
      typedef typename _base::die1 die1;
      typedef typename _base::die2 die2;
      typedef typename _base::dwarf1_ref dwarf1_ref;
+    class reference_match;
  
    protected:
      typedef dwarf_path_finder<dwarf1> tracker1;
@@ -406,6 +407,10 @@ namespace elfutils
        }
      };
  
+    typedef std::tr1::unordered_map<dwarf::debug_info_entry::identity_type,
+                                   reference_match *> active_map;
+    active_map _m_active;
+
      typedef std::pair<const die2 *,
                       std::tr1::unordered_set<die2, ref_hasher, same_ref>
                       > equiv_list;
@@ -419,6 +424,10 @@ namespace elfutils
        return &(*_m_equiv)[a->identity ()];
      }
  
+    typedef dwarf_tracker_base<dwarf1, dwarf2> context_tracker;
+    typedef dwarf_comparator<dwarf1, dwarf2, true,
+                            context_tracker> context_comparator;
+
      /* Predicate for DIEs "equal enough" to match as context for a subtree.
         The definition we use is that the DIE has the same tag and all its
         attributes are equal, excepting that references in attribute values
@@ -429,9 +438,9 @@ namespace elfutils
        {
         if (a->tag () != b->tag ())
           return false;
-       dwarf_tracker_base<dwarf1, dwarf2> t;
-       return (dwarf_comparator<dwarf1, dwarf2, true> (t)
-               .equals (a->attributes (), b->attributes ()));
+       context_tracker t;
+       return context_comparator (t).equals (a->attributes (),
+                                             b->attributes ());
        }
      };
  
@@ -509,51 +518,65 @@ namespace elfutils
      class reference_match
      {
        friend class dwarf_ref_tracker;
-    private:
-      equiv_list *_m_elt;
+    protected:
+      equiv_list *_m_lhs;
+      typename active_map::value_type *_m_rhs;
+      active_map *_m_active;
  
      public:
  
        inline reference_match ()
-       : _m_elt (NULL)
+       : _m_lhs (NULL), _m_rhs (NULL), _m_active (NULL)
        {}
  
        inline ~reference_match ()
        {
-       if (_m_elt != NULL)
-         _m_elt->first = NULL;
-      }
-
-      inline bool cannot_match () const
-      {
-       return _m_elt == NULL;
-      }
-
-      inline void notice_match (const die2 &b, bool matches) const
-      {
-       if (matches && _m_elt != NULL)
-         _m_elt->second.insert (b);
+       if (_m_lhs != NULL)
+         _m_lhs->first = NULL;
+       if (_m_rhs != NULL)
+         _m_active->erase (_m_rhs->first);
        }
      };
  
+    /* A prematch during the main tree walk does the same cache lookup
+       as real reference matching.  But it doesn't record itself as a
+       "walk in progress" for the circularity-catching logic.  Doing so
+       can break that logic for comparison purposes.  Since we key our
+       cache on identity, a lookup can hit a shared DIE as well as one
+       that is truly involved in our current walk.  If we hit a shared
+       DIE on the main walk, and within that recursion (i.e. somewhere
+       in its children or along its own references) we encounter a
+       circularity, we'd take the main-walk's equiv_list record as the
+       root of the circularity on one side, while on the other side the
+       DIEs may not have been shared and so the same circularity is
+       actually rooted at the second instance of an identical DIE.  */
+    inline bool prematch (reference_match &matched,
+                         const die1 &a, const die2 &b)
+    {
+      return reference_matched (matched, a, b, false);
+    }
+
      inline bool
-    reference_matched (reference_match &matched, const die1 &a, const die2 &b)
+    reference_matched (reference_match &matched, const die1 &a, const die2 &b,
+                      bool record = true)
      {
        equiv_list *elt = equiv_to (a);
        if (elt->first == NULL)
         {
-         /* Record that we have a walk in progress crossing A.
-            When MATCHED goes out of scope in our caller, its
-            destructor will reset ELT->first to clear this record.  */
-         elt->first = &b;
-         matched._m_elt = elt;
+         matched._m_lhs = elt;
+
+         if (record)
+           /* Record that we have a walk in progress crossing A.
+              When MATCHED goes out of scope in our caller, its
+              destructor will reset ELT->first to clear this record.  */
+           elt->first = &b;
  
           // Short-circuit if we have already matched B to A.
           return elt->second.find (b) != elt->second.end ();
         }
  
-      /* We have a circularity.  We can tell because ELT->first remains
-        set from an outer recursion still in progress.
+      /* We have a circularity on the left-hand side.  We can tell because
+        ELT->first remains set from an outer recursion still in progress.
  
          The circular chain of references rooted at A matches B if B is
          also the root of its own circularity and everything along those
@@ -564,7 +587,70 @@ namespace elfutils
          We actually record the pointer on the caller's stack rather
          than a copy of B, just because the iterator might be larger.  */
  
-      return *elt->first == b;
+      if ((*elt->first)->identity () == b->identity ())
+       return true;
+
+      /* Our right-hand side is not in lock-step on a matching circularity.
+        But it's still possible this is a matching reference nonetheless.
+        A difference in the sharing vs duplication of DIEs between the
+        left-hand and right-hand sides could mean that one side's chain of
+        references reaches the same cycle sooner than the other's.
+
+        Consider:
+
+               A1 -> A2 -> ... -> A1' -> A2 ...
+               B1 -> B2 -> ... -> B1  -> B2 ...
+
+        Here A1' is an identical copy of A1, duplicated in the A object.
+        Otherwise A1 matches B1, A2 matches B2, etc.  The B walk started
+        at B1 and hits it again at the step comparing B1 to A1'.  But the
+        A walk has not hit A1 again yet (and perhaps it never will), so
+        our test above does not match.
+
+        This is the simplest example.  There might be more steps of the
+        reference chain that have duplicates on one side but have been
+        consolidated to a single entry on the other.  There can also be
+        multiple reference attributes at each node that differ on this
+        issue, making all manner of tangled graphs that all really match
+        the same simpler graph (and thus each other).
+
+        Now we start recording the state of the right-hand side reference
+        chain walk, and keep going.  When the right-hand side then becomes
+        circular, we check that it has coincided with the left-hand side.
+
+        This is guaranteed to terminate, at least.  It should never have
+        any false positives, since that continuing walk would eventually
+        find the differences.  We hope it doesn't have any false negatives
+        either, but to be sure of that would require more graph theory
+        than your humble writer can bring to bear.  */
+
+      const std::pair<typename active_map::iterator, bool> p
+       = _m_active.insert (std::make_pair (b->identity (), &matched));
+      if (p.second)
+       {
+         assert (p.first->second == &matched);
+         matched._m_lhs = elt;
+         matched._m_active = &_m_active;
+         matched._m_rhs = &*p.first;
+         return false;
+       }
+
+      assert (p.first->second != &matched);
+      return p.first->second->_m_lhs == elt;
+    }
+
+    inline bool cannot_match (reference_match &matched,
+                             const die1 &, const die2 &)
+    {
+      return matched._m_lhs == NULL && matched._m_rhs == NULL;
+    }
+
+    inline bool notice_match (reference_match &matched,
+                             const die1 &, const die2 &b, bool matches)
+    {
+      if (matches && matched._m_lhs != NULL)
+       matched._m_lhs->second.insert (b);
+      return matches;
      }
  
      typedef dwarf_ref_tracker subtracker;
@@ -578,7 +664,7 @@ namespace elfutils
         _m_right (proto._m_right, rhs),
         _m_equiv (proto._m_equiv), _m_delete_equiv (false)
      {
-      // We are starting a recursive consideration of a vs b.
+      // We are starting a recursive consideration of LHS vs RHS.
      }
    };
  };
diff --git a/src/ChangeLog b/src/ChangeLog

index dd71bf43ee3c9c06996286ea9aad0b35b195fe6e..c10f67b6c227027828ece0a461bc78b58adda950 100644 (file)
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,8 @@
+2009-08-25  Roland McGrath  <roland@redhat.com>
+
+       * dwarfcmp.cc (talker): Track real vs fake-positive match result
+       and cache only real results in the real tracker.
+
  2009-08-20  Roland McGrath  <roland@redhat.com>
  
         * dwarfcmp.cc (verbose): New variable.
diff --git a/src/dwarfcmp.cc b/src/dwarfcmp.cc

index 6e51e2d1a612636be15ba659708df6ecb2c2cbdc..5ef182e9d4f58542b5019ff4608ea7c8eed4bbf3 100644 (file)
--- a/src/dwarfcmp.cc
+++ b/src/dwarfcmp.cc
@@ -130,7 +130,7 @@ open_file (const char *fname, int *fdp)
  
  // XXX make translation-friendly
  
-template<class dwarf1, class dwarf2>
+template<class dwarf1, class dwarf2, bool print_all>
  struct talker : public dwarf_ref_tracker<dwarf1, dwarf2>
  {
    typedef dwarf_tracker_base<dwarf1, dwarf2> _base;
@@ -145,6 +145,7 @@ struct talker : public dwarf_ref_tracker<dwarf1, dwarf2>
    const typename dwarf1::debug_info_entry *a_;
    const typename dwarf2::debug_info_entry *b_;
    bool result_;
+  bool visiting_result_;
  
    inline talker ()
      : a_ (NULL), b_ (NULL), result_ (true)
@@ -158,12 +159,6 @@ struct talker : public dwarf_ref_tracker<dwarf1, dwarf2>
    {
    }
  
-  inline bool keep_going ()
-  {
-    result_ = false;
-    return verbose;
-  }
-
    inline ostream &location () const
    {
      return cout << hex << a_->offset () << " vs " << b_->offset () << ": ";
@@ -174,12 +169,35 @@ struct talker : public dwarf_ref_tracker<dwarf1, dwarf2>
    {
      a_ = &a;
      b_ = &b;
-    if (a.tag () != b.tag ())
+    visiting_result_ = a.tag () == b.tag ();
+    if (!visiting_result_)
        location () << dwarf::tags::name (a.tag ())
                   << " vs "
                   << dwarf::tags::name (b.tag ());
    }
  
+  inline bool keep_going ()
+  {
+    result_ = visiting_result_ = false;
+    return print_all;
+  }
+
+  inline bool notice_match (typename subtracker::reference_match &matched,
+                           const die1 &a, const die2 &b, bool matches)
+  {
+    /* In the main walk, a mismatch would have gone to keep_going.
+       But in reference_match, the comparator uses a subtracker.
+       It won't have set visiting_result_, but it will return a
+       real non-matching result we can catch here.  */
+    if (!matches)
+      visiting_result_ = false;
+
+    // Record the real result in the cache, not a fake match for -l.
+    subtracker::notice_match (matched, a, b, visiting_result_);
+
+    return matches || keep_going ();
+  }
+
    inline bool mismatch (cu1 &it1, const cu1 &end1,
                         cu2 &it2, const cu2 &end2)
    {
@@ -259,8 +277,9 @@ struct talker : public dwarf_ref_tracker<dwarf1, dwarf2>
    inline void reference_mismatch (const die1 &ref1, const die2 &ref2)
    {
      subcomparator cmp (*(subtracker *) this);
-    if (cmp.equals (ref1, ref2))
-      cout << " (XXX refs now equal again!)";
+    if (cmp.equals (ref1, ref2) && !print_all)
+      cout << " (XXX refs now equal again!)"
+          << (cmp.equals (*ref1, *ref2) ? "" : " (and not identical!!)");
      else if (cmp.equals (*ref1, *ref2))
        cout << " (identical but contexts mismatch)";
      else
@@ -293,10 +312,17 @@ struct quiet_cmp
    : public cmp<dwarf1, dwarf2, dwarf_ref_tracker<dwarf1, dwarf2> >
  {};
  
-// To be noisy, the talker wraps the standard tracker with verbosity hooks.
  template<class dwarf1, class dwarf2>
+static inline bool
+quiet_compare (const dwarf1 &a, const dwarf2 &b)
+{
+  return quiet_cmp<dwarf1, dwarf2> () (a, b);
+}
+
+// To be noisy, the talker wraps the standard tracker with verbosity hooks.
+template<class dwarf1, class dwarf2, bool print_all>
  struct noisy_cmp
-  : public cmp<dwarf1, dwarf2, talker<dwarf1, dwarf2> >
+  : public cmp<dwarf1, dwarf2, talker<dwarf1, dwarf2, print_all> >
  {
    inline bool operator () (const dwarf1 &a, const dwarf2 &b)
    {
@@ -305,6 +331,23 @@ struct noisy_cmp
  };
  
  
+template<class dwarf1, class dwarf2, bool print_all>
+static inline bool
+noisy_compare (const dwarf1 &a, const dwarf2 &b)
+{
+  return noisy_cmp<dwarf1, dwarf2, print_all> () (a, b);
+}
+
+template<class dwarf1, class dwarf2>
+static inline bool
+noisy_compare (const dwarf1 &a, const dwarf2 &b, bool print_all)
+{
+  return (print_all
+         ? noisy_cmp<dwarf1, dwarf2, true> () (a, b)
+         : noisy_cmp<dwarf1, dwarf2, false> () (a, b));
+}
+
+
  // Test that one comparison works as expected.
  template<class dwarf1, class dwarf2>
  static void
@@ -313,7 +356,7 @@ test_compare (const dwarf1 &file1, const dwarf2 &file2, bool expect)
    if (quiet_cmp<dwarf1, dwarf2> () (file1, file2) != expect)
      {
        if (expect)
-       noisy_cmp<dwarf1, dwarf2> () (file1, file2);
+       noisy_compare (file1, file2, true);
        throw std::logic_error (__PRETTY_FUNCTION__);
      }
  }
@@ -416,8 +459,8 @@ main (int argc, char *argv[])
        dwarf file2 (dw2);
  
        bool same = (quiet
-                  ? quiet_cmp<dwarf, dwarf> () (file1, file2)
-                  : noisy_cmp<dwarf, dwarf> () (file1, file2));
+                  ? quiet_compare (file1, file2)
+                  : noisy_compare (file1, file2, verbose));
  
        if (test_writer & 1)
         test_output (file1, file2, false, file1, file2, same);
author	Roland McGrath <roland@redhat.com>
	Wed, 26 Aug 2009 04:14:38 +0000 (21:14 -0700)
committer	Roland McGrath <roland@redhat.com>
	Wed, 26 Aug 2009 04:14:38 +0000 (21:14 -0700)
libdw/ChangeLog		patch \| blob \| blame \| history
libdw/c++/dwarf_comparator		patch \| blob \| blame \| history
libdw/c++/dwarf_tracker		patch \| blob \| blame \| history
src/ChangeLog		patch \| blob \| blame \| history
src/dwarfcmp.cc		patch \| blob \| blame \| history