]> git.ipfire.org Git - thirdparty/openembedded/openembedded-core-contrib.git/commitdiff
Implement task signatures
authorRichard Purdie <rpurdie@linux.intel.com>
Tue, 31 Aug 2010 13:49:43 +0000 (14:49 +0100)
committerChris Larson <chris_larson@mentor.com>
Thu, 30 Dec 2010 06:51:07 +0000 (23:51 -0700)
Includes functionality to find out what changes between two different
signature data dumps.

Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
Signed-off-by: Chris Larson <chris_larson@mentor.com>
bin/bitbake
bin/bitbake-diffsigs [new file with mode: 0755]
lib/bb/cache.py
lib/bb/cooker.py
lib/bb/parse/__init__.py
lib/bb/parse/ast.py
lib/bb/runqueue.py
lib/bb/siggen.py [new file with mode: 0644]
lib/bb/utils.py

index 63b1d1d1299c8a55562b927e6e473afe0cf779c0..e14c017c1dc4a35b87be6f2fbe60dc742734902d 100755 (executable)
@@ -135,6 +135,9 @@ Default BBFILES are the .bb files in the current directory.""")
     parser.add_option("-n", "--dry-run", help = "don't execute, just go through the motions",
                action = "store_true", dest = "dry_run", default = False)
 
+    parser.add_option("-S", "--dump-signatures", help = "don't execute, just dump out the signature construction information",
+               action = "store_true", dest = "dump_signatures", default = False)
+
     parser.add_option("-p", "--parse-only", help = "quit after parsing the BB files (developers only)",
                action = "store_true", dest = "parse_only", default = False)
 
diff --git a/bin/bitbake-diffsigs b/bin/bitbake-diffsigs
new file mode 100755 (executable)
index 0000000..5eb77ce
--- /dev/null
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+import os
+import sys
+import warnings
+sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(sys.argv[0])), 'lib'))
+
+import bb.siggen
+
+if len(sys.argv) > 2:
+    bb.siggen.compare_sigfiles(sys.argv[1], sys.argv[2])
+else:
+    bb.siggen.dump_sigfile(sys.argv[1])
index d72622d7489d4030592b3d35ea23af07acd4544a..2d6eac1a9cc85a5b598de06d8b4a45fc545df73c 100644 (file)
@@ -43,7 +43,7 @@ except ImportError:
     logger.info("Importing cPickle failed. "
                 "Falling back to a very slow implementation.")
 
-__cache_version__ = "133"
+__cache_version__ = "134"
 
 recipe_fields = (
     'pn',
@@ -70,6 +70,8 @@ recipe_fields = (
     'nocache',
     'variants',
     'file_depends',
+    'tasks',
+    'basetaskhashes',
 )
 
 
@@ -93,21 +95,33 @@ class RecipeInfo(namedtuple('RecipeInfo', recipe_fields)):
         return dict((pkg, cls.depvar("%s_%s" % (var, pkg), metadata))
                     for pkg in packages)
 
+    @classmethod
+    def taskvar(cls, var, tasks, metadata):
+        return dict((task, cls.getvar("%s_task-%s" % (var, task), metadata))
+                    for task in tasks)
+
     @classmethod
     def getvar(cls, var, metadata):
         return metadata.getVar(var, True) or ''
 
     @classmethod
     def from_metadata(cls, filename, metadata):
+        tasks = metadata.getVar('__BBTASKS', False)
+
         pn = cls.getvar('PN', metadata)
         packages = cls.listvar('PACKAGES', metadata)
         if not pn in packages:
             packages.append(pn)
+
         return RecipeInfo(
+            tasks            = tasks,
+            basetaskhashes   = cls.taskvar('BB_BASEHASH', tasks, metadata),
+
             file_depends     = metadata.getVar('__depends', False),
             task_deps        = metadata.getVar('_task_deps', False) or
                                {'tasks': [], 'parents': {}},
             variants         = cls.listvar('__VARIANTS', metadata) + [''],
+
             skipped          = cls.getvar('__SKIPPED', metadata),
             timestamp        = bb.parse.cached_mtime(filename),
             packages         = cls.listvar('PACKAGES', metadata),
@@ -538,6 +552,8 @@ class CacheData(object):
         self.task_deps = {}
         self.stamp = {}
         self.preferred = {}
+        self.tasks = {}
+        self.basetaskhash = {}
 
         """
         Indirect Cache variables
@@ -594,3 +610,6 @@ class CacheData(object):
         if not info.broken and not info.not_world:
             self.possible_world.append(fn)
 
+        for task, taskhash in info.basetaskhashes.iteritems():
+            identifier = '%s.%s' % (fn, task)
+            self.basetaskhash[identifier] = taskhash
index 1df2bfe8abd100ddd4d040e1b7dcd23ac3bb63b0..0e341158bf5175bc38fe91bfd48a3d68a7133309 100644 (file)
@@ -485,6 +485,7 @@ class BBCooker:
                 sys.exit(1)
 
         data = self.configuration.data
+        bb.parse.init_parser(data)
         for f in files:
             data = _parse(f, data)
 
@@ -523,6 +524,7 @@ class BBCooker:
 
         bb.fetch.fetcher_init(self.configuration.data)
         bb.codeparser.parser_cache_init(self.configuration.data)
+        bb.parse.init_parser(data)
         bb.event.fire(bb.event.ConfigParsed(), self.configuration.data)
 
     def handleCollections( self, collections ):
index 3015d0c8dc4ed83766a66a4f4d0d4cbae3ca2fe3..7f1562e66efeb19e734507a20d8f5b2d97ebed13 100644 (file)
@@ -30,6 +30,7 @@ import os
 import logging
 import bb
 import bb.utils
+import bb.siggen
 
 logger = logging.getLogger("BitBake.Parsing")
 
@@ -83,6 +84,9 @@ def init(fn, data):
         if h['supports'](fn):
             return h['init'](data)
 
+def init_parser(d):
+    bb.parse.siggen = bb.siggen.init(d)
+
 def resolve_file(fn, d):
     if not os.path.isabs(fn):
         bbpath = bb.data.getVar("BBPATH", d, True)
index 8a02c10f64bed15300f8a635ef109f86b7c320f5..c001f1b45e1331cdd196bfb7000f9a85bb48adbe 100644 (file)
@@ -300,7 +300,7 @@ def handleInherit(statements, m):
     n = __word__.findall(files)
     statements.append(InheritNode(m.group(1)))
 
-def finalize(fn, d):
+def finalize(fn, d, variant = None):
     for lazykey in bb.data.getVar("__lazy_assigned", d) or ():
         if bb.data.getVar(lazykey, d) is None:
             val = bb.data.getVarFlag(lazykey, "defaultval", d)
@@ -323,7 +323,7 @@ def finalize(fn, d):
     tasklist = bb.data.getVar('__BBTASKS', d) or []
     bb.build.add_tasks(tasklist, d)
 
-    #bb.data.generate_dependencies(d)
+    bb.parse.siggen.finalise(fn, d, variant)
 
     bb.event.fire(bb.event.RecipeParsed(fn), d)
 
@@ -433,7 +433,7 @@ def multi_finalize(fn, d):
     for variant, variant_d in datastores.iteritems():
         if variant:
             try:
-                finalize(fn, variant_d)
+                finalize(fn, variant_d, variant)
             except bb.parse.SkipPackage:
                 bb.data.setVar("__SKIPPED", True, variant_d)
 
index 087edace3fe94302e2832c82970dbe45f3237feb..076d3b8f471e3cec1e3b8f2b78396c3c78deda87 100644 (file)
@@ -188,6 +188,7 @@ class RunQueueData:
         self.runq_task = []
         self.runq_depends = []
         self.runq_revdeps = []
+        self.runq_hash = []
 
     def runq_depends_names(self, ids):
         import re
@@ -489,6 +490,7 @@ class RunQueueData:
             self.runq_task.append(taskData.tasks_name[task])
             self.runq_depends.append(set(depends))
             self.runq_revdeps.append(set())
+            self.runq_hash.append("")
 
             runq_build.append(0)
             runq_recrdepends.append(recrdepends)
@@ -601,6 +603,7 @@ class RunQueueData:
                 del self.runq_depends[listid-delcount]
                 del runq_build[listid-delcount]
                 del self.runq_revdeps[listid-delcount]
+                del self.runq_hash[listid-delcount]
                 delcount = delcount + 1
                 maps.append(-1)
 
@@ -692,6 +695,19 @@ class RunQueueData:
                 continue
             self.runq_setscene.append(task)
 
+        # Interate over the task list and call into the siggen code
+        dealtwith = set()
+        todeal = set(range(len(self.runq_fnid)))
+        while len(todeal) > 0:
+            for task in todeal.copy():
+                if len(self.runq_depends[task] - dealtwith) == 0:
+                    dealtwith.add(task)
+                    todeal.remove(task)
+                    procdep = []
+                    for dep in self.runq_depends[task]:
+                        procdep.append(self.taskData.fn_index[self.runq_fnid[dep]] + "." + self.runq_task[dep])
+                    self.runq_hash[task] = bb.parse.siggen.get_taskhash(self.taskData.fn_index[self.runq_fnid[task]], self.runq_task[task], procdep, self.dataCache)
+
         return len(self.runq_fnid)
 
     def dump_data(self, taskQueue):
@@ -843,7 +859,9 @@ class RunQueue:
         fn = self.rqdata.taskData.fn_index[self.rqdata.runq_fnid[task]]
         if taskname is None:
             taskname = self.rqdata.runq_task[task]
-        stampfile = "%s.%s" % (self.rqdata.dataCache.stamp[fn], taskname)
+
+        stampfile = bb.parse.siggen.stampfile(self.rqdata.dataCache.stamp[fn], taskname, self.rqdata.runq_hash[task])
+
         # If the stamp is missing its not current
         if not os.access(stampfile, os.F_OK):
             logger.debug(2, "Stampfile %s not available", stampfile)
@@ -863,9 +881,10 @@ class RunQueue:
             if iscurrent:
                 fn2 = self.rqdata.taskData.fn_index[self.rqdata.runq_fnid[dep]]
                 taskname2 = self.rqdata.runq_task[dep]
-                stampfile2 = "%s.%s" % (self.rqdata.dataCache.stamp[fn2], taskname2)
+                stampfile2 = bb.parse.siggen.stampfile(self.rqdata.dataCache.stamp[fn2], taskname2, self.rqdata.runq_hash[dep])
+                stampfile3 = bb.parse.siggen.stampfile(self.rqdata.dataCache.stamp[fn2], taskname2 + "_setscene", self.rqdata.runq_hash[dep])
                 t2 = get_timestamp(stampfile2)
-                t3 = get_timestamp(stampfile2 + "_setscene")
+                t3 = get_timestamp(stampfile3)
                 if t3 and t3 > t2:
                    continue
                 if fn == fn2 or (fulldeptree and fn2 not in stampwhitelist):
@@ -892,7 +911,10 @@ class RunQueue:
                 self.state = runQueueSceneInit
 
         if self.state is runQueueSceneInit:
-            self.rqexe = RunQueueExecuteScenequeue(self)
+            if self.cooker.configuration.dump_signatures:
+                self.dump_signatures()
+            else:
+                self.rqexe = RunQueueExecuteScenequeue(self)
 
         if self.state is runQueueSceneRun:
             self.rqexe.execute()
@@ -933,6 +955,20 @@ class RunQueue:
         else:
             self.rqexe.finish()
 
+    def dump_signatures(self):
+        self.state = runQueueComplete
+        done = set()
+        bb.note("Reparsing files to collect dependency data")
+        for task in range(len(self.rqdata.runq_fnid)):
+            if self.rqdata.runq_fnid[task] not in done:
+                fn = self.rqdata.taskData.fn_index[self.rqdata.runq_fnid[task]]
+                the_data = self.cooker.bb_cache.loadDataFull(fn, self.cooker.get_file_appends(fn), self.cooker.configuration.data)
+                done.add(self.rqdata.runq_fnid[task])
+
+        bb.parse.siggen.dump_sigs(self.rqdata.dataCache)
+
+        return
+
 
 class RunQueueExecute:
 
@@ -1024,6 +1060,7 @@ class RunQueueExecute:
             bb.data.setVar("__RUNQUEUE_DO_NOT_USE_EXTERNALLY2", fn, self.cooker.configuration.data)
             try:
                 the_data = bb.cache.Cache.loadDataFull(fn, self.cooker.get_file_appends(fn), self.cooker.configuration.data)
+                the_data.setVar('BB_TASKHASH', self.rqdata.runq_hash[task])
                 bb.build.exec_task(fn, taskname, the_data)
             except Exception as exc:
                 logger.critical(str(exc))
diff --git a/lib/bb/siggen.py b/lib/bb/siggen.py
new file mode 100644 (file)
index 0000000..da85039
--- /dev/null
@@ -0,0 +1,256 @@
+import hashlib
+import logging
+import re
+
+logger = logging.getLogger('BitBake.SigGen')
+
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+    logger.info('Importing cPickle failed.  Falling back to a very slow implementation.')
+
+def init(d):
+    siggens = [obj for obj in globals().itervalues()
+                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
+
+    desired = bb.data.getVar("BB_SIGNATURE_HANDLER", d, True) or "noop"
+    for sg in siggens:
+        if desired == sg.name:
+            return sg(d)
+            break
+    else:
+        logger.error("Invalid signature generator '%s', using default 'noop'\n"
+                     "Available generators: %s",
+                     ', '.join(obj.name for obj in siggens))
+        return SignatureGenerator(d)
+
+class SignatureGenerator(object):
+    """
+    """
+    name = "noop"
+
+    def __init__(self, data):
+        return
+
+    def finalise(self, fn, d, varient):
+        return
+
+    def get_taskhash(self, fn, task, deps, dataCache):
+        return 0
+
+    def stampfile(self, stampbase, taskname, taskhash):
+        return "%s.%s" % (stampbase, taskname)
+
+class SignatureGeneratorBasic(SignatureGenerator):
+    """
+    """
+    name = "basic"
+
+    def __init__(self, data):
+        self.basehash = {}
+        self.taskhash = {}
+        self.taskdeps = {}
+        self.runtaskdeps = {}
+        self.gendeps = {}
+        self.lookupcache = {}
+        self.basewhitelist = (data.getVar("BB_HASHBASE_WHITELIST", True) or "").split()
+        self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST", True) or None
+
+        if self.taskwhitelist:
+            self.twl = re.compile(self.taskwhitelist)
+        else:
+            self.twl = None
+
+    def _build_data(self, fn, d):
+
+        taskdeps, gendeps = bb.data.generate_dependencies(d)
+
+        basehash = {}
+        lookupcache = {}
+
+        for task in taskdeps:
+            data = d.getVar(task, False)
+            lookupcache[task] = data
+            for dep in sorted(taskdeps[task]):
+                if dep in self.basewhitelist:
+                    continue
+                if dep in lookupcache:
+                    var = lookupcache[dep]
+                else:
+                    var = d.getVar(dep, False)
+                    lookupcache[dep] = var
+                if var:
+                    data = data + var
+            if data is None:
+                bb.error("Task %s from %s seems to be empty?!" % (task, fn))
+            self.basehash[fn + "." + task] = hashlib.md5(data).hexdigest()
+
+        self.taskdeps[fn] = taskdeps
+        self.gendeps[fn] = gendeps
+        self.lookupcache[fn] = lookupcache
+
+        return taskdeps
+
+    def finalise(self, fn, d, variant):
+
+        if variant:
+            fn = "virtual:" + variant + ":" + fn
+
+        taskdeps = self._build_data(fn, d)
+
+        #Slow but can be useful for debugging mismatched basehashes
+        #for task in self.taskdeps[fn]:
+        #    self.dump_sigtask(fn, task, d.getVar("STAMP", True), False)
+
+        for task in taskdeps:
+            d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + "." + task])
+
+    def get_taskhash(self, fn, task, deps, dataCache):
+        k = fn + "." + task
+        data = dataCache.basetaskhash[k]
+        self.runtaskdeps[k] = []
+        for dep in sorted(deps):
+            # We only manipulate the dependencies for packages not in the whitelist
+            if self.twl and not self.twl.search(dataCache.pkg_fn[fn]):
+                # then process the actual dependencies
+                dep_fn = re.search("(?P<fn>.*)\..*", dep).group('fn')
+                if self.twl.search(dataCache.pkg_fn[dep_fn]):
+                    continue
+            if dep not in self.taskhash:
+                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?", dep)
+            data = data + self.taskhash[dep]
+            self.runtaskdeps[k].append(dep)
+        h = hashlib.md5(data).hexdigest()
+        self.taskhash[k] = h
+        #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
+        return h
+
+    def dump_sigtask(self, fn, task, stampbase, runtime):
+        k = fn + "." + task
+        if runtime == "customfile":
+            sigfile = stampbase
+        elif runtime:
+            sigfile = stampbase + "." + task + ".sigdata" + "." + self.taskhash[k]
+        else:
+            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[k]
+
+        bb.utils.mkdirhier(os.path.dirname(sigfile))
+
+        data = {}
+        data['basewhitelist'] = self.basewhitelist
+        data['taskwhitelist'] = self.taskwhitelist
+        data['taskdeps'] = self.taskdeps[fn][task]
+        data['basehash'] = self.basehash[k]
+        data['gendeps'] = {}
+        data['varvals'] = {}
+        data['varvals'][task] = self.lookupcache[fn][task]
+        for dep in self.taskdeps[fn][task]:
+            if dep in self.basewhitelist:
+                continue
+            data['gendeps'][dep] = self.gendeps[fn][dep]
+            data['varvals'][dep] = self.lookupcache[fn][dep]
+
+        if runtime and runtime != "customfile":
+            data['runtaskdeps'] = self.runtaskdeps[k]
+            data['runtaskhashes'] = {}
+            for dep in data['runtaskdeps']:
+                data['runtaskhashes'][dep] = self.taskhash[dep]
+
+        p = pickle.Pickler(file(sigfile, "wb"), -1)
+        p.dump(data)
+
+    def dump_sigs(self, dataCache):
+        for fn in self.taskdeps:
+            for task in self.taskdeps[fn]:
+                k = fn + "." + task
+                if k not in self.taskhash:
+                    continue
+                if dataCache.basetaskhash[k] != self.basehash[k]:
+                    bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % k)
+                    bb.error("The mismatched hashes were %s and %s" % (dataCache.basetaskhash[k], self.basehash[k]))
+                self.dump_sigtask(fn, task, dataCache.stamp[fn], True)
+
+def dump_this_task(outfile, d):
+    fn = d.getVar("BB_FILENAME", True)
+    task = "do_" + d.getVar("BB_CURRENTTASK", True)
+    bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile")
+
+def compare_sigfiles(a, b):
+    p1 = pickle.Unpickler(file(a, "rb"))
+    a_data = p1.load()
+    p2 = pickle.Unpickler(file(b, "rb"))
+    b_data = p2.load()
+
+    def dict_diff(a, b):
+        sa = set(a.keys())
+        sb = set(b.keys())
+        common = sa & sb
+        changed = set()
+        for i in common:
+            if a[i] != b[i]:
+                changed.add(i)
+        added = sa - sb
+        removed = sb - sa
+        return changed, added, removed
+
+    if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
+        print "basewhitelist changed from %s to %s" % (a_data['basewhitelist'], b_data['basewhitelist'])
+
+    if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
+        print "taskwhitelist changed from %s to %s" % (a_data['taskwhitelist'], b_data['taskwhitelist'])
+
+    if a_data['taskdeps'] != b_data['taskdeps']:
+        print "Task dependencies changed from %s to %s" % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))
+
+    if a_data['basehash'] != b_data['basehash']:
+        print "basehash changed from %s to %s" % (a_data['basehash'], b_data['basehash'])
+
+    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'])
+    if changed:
+        for dep in changed:
+            print "List of dependencies for variable %s changed from %s to %s" % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])
+    if added:
+        for dep in added:
+            print "Dependency on variable %s was added" % (dep)
+    if removed:
+        for dep in removed:
+            print "Dependency on Variable %s was removed" % (dep)
+
+
+    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
+    if changed:
+        for dep in changed:
+            print "Variable %s value changed from %s to %s" % (dep, a_data['varvals'][dep], b_data['varvals'][dep])
+    if 'runtaskdeps' in a_data and 'runtaskdeps' in b_data and sorted(a_data['runtaskdeps']) != sorted(b_data['runtaskdeps']):
+        print "Tasks this task depends on changed from %s to %s" % (sorted(a_data['runtaskdeps']), sorted(b_data['runtaskdeps']))
+
+    if 'runtaskhashes' in a_data:
+        for dep in a_data['runtaskhashes']:
+            if a_data['runtaskhashes'][dep] != b_data['runtaskhashes'][dep]:
+                print "Hash for dependent task %s changed from %s to %s" % (dep, a_data['runtaskhashes'][dep], b_data['runtaskhashes'][dep])
+
+def dump_sigfile(a):
+    p1 = pickle.Unpickler(file(a, "rb"))
+    a_data = p1.load()
+
+    print "basewhitelist: %s" % (a_data['basewhitelist'])
+
+    print "taskwhitelist: %s" % (a_data['taskwhitelist'])
+
+    print "Task dependencies: %s" % (sorted(a_data['taskdeps']))
+
+    print "basehash: %s" % (a_data['basehash'])
+
+    for dep in a_data['gendeps']:
+        print "List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep])
+
+    for dep in a_data['varvals']:
+        print "Variable %s value is %s" % (dep, a_data['varvals'][dep])
+
+    if 'runtaskdeps' in a_data:
+        print "Tasks this task depends on: %s" % (a_data['runtaskdeps'])
+
+    if 'runtaskhashes' in a_data:
+        for dep in a_data['runtaskhashes']:
+            print "Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])
index b121b121ce1e15146c312d4dbc2b505980bc90cd..a65825c3c1a08586d15a79514da08cff87f34b16 100644 (file)
@@ -479,6 +479,7 @@ def preserved_envvars_list():
         'BB_PRESERVE_ENV',
         'BB_ENV_WHITELIST',
         'BB_ENV_EXTRAWHITE',
+        'BB_TASKHASH',
         'COLORTERM',
         'DBUS_SESSION_BUS_ADDRESS',
         'DESKTOP_SESSION',