]> git.ipfire.org Git - thirdparty/openembedded/openembedded-core-contrib.git/commitdiff
pybootchartgui: _parse_proc_ps_log rewrite with iterator
authorAndrey Bondarenko <abone27@mail.ru>
Tue, 4 Nov 2014 20:25:10 +0000 (01:25 +0500)
committerArmin Kuster <akuster808@gmail.com>
Sat, 4 May 2019 01:33:14 +0000 (18:33 -0700)
Iterators use much less memory, so larger bootcharts
may be processed without triggering OOM killer and
massive swapping.

On a (big) 11MB tarball this will have a performance penalty
of about ~10% but consuming half the memory.

Before:
23.50user 1.20system 0:24.97elapsed 98%CPU (0avgtext+0avgdata 770048maxresident)k

After:
26.78user 0.44system 0:27.24elapsed 99%CPU (0avgtext+0avgdata 321192maxresident)k

Signed-off-by: Armin Kuster <akuster808@gmail.com>
scripts/pybootchartgui/pybootchartgui/parsing.py

index 0b5063b4f3a1473046b0a3cc3752ca1ea50ea23e..97d28724a4e04e59d606e912299caf82be6c67bb 100644 (file)
@@ -13,6 +13,9 @@
 #  You should have received a copy of the GNU General Public License
 #  along with pybootchartgui. If not, see <http://www.gnu.org/licenses/>.
 
+
+import codecs
+import itertools
 import os
 import string
 import re
@@ -269,6 +272,30 @@ def _parse_headers(file):
         return headers, last
     return reduce(parse, file.read().decode('utf-8').split('\n'), (defaultdict(str),''))[0]
 
+def _iter_parse_timed_blocks(file):
+    """Parses (ie., splits) a file into so-called timed-blocks.
+
+    A timed-block consists of a timestamp on a line by itself followed
+    by zero or more lines of data for that point in time.
+
+    Return an iterator over timed blocks, so there is no need to keep
+    all the data in memory.
+    """
+    def parse(block):
+        lines = block
+        if not lines:
+            raise ParseError('expected a timed-block consisting a timestamp followed by data lines')
+        try:
+            return (int(lines[0]), lines[1:])
+        except ValueError:
+            raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0])
+    data = codecs.iterdecode(file, "utf-8")
+    block = [line.strip() for line in itertools.takewhile(lambda s: s != "\n", data)]
+    while block:
+        if block and not block[-1].endswith(" not running\n"):
+            yield parse(block)
+        block = [line.strip() for line in itertools.takewhile(lambda s: s != "\n", data)]
+
 def _parse_timed_blocks(file):
     """Parses (ie., splits) a file into so-called timed-blocks. A
     timed-block consists of a timestamp on a line by itself followed
@@ -292,10 +319,18 @@ def _parse_proc_ps_log(writer, file):
      *  cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack,
      *  kstkesp, kstkeip}
     """
+    timed_blocks = _iter_parse_timed_blocks(file)
+    try:
+        first_timed_block = next(timed_blocks)
+        startTime = first_timed_block[0]
+    except StopIteration:
+        return None
+
     processMap = {}
     ltime = 0
-    timed_blocks = _parse_timed_blocks(file)
-    for time, lines in timed_blocks:
+    timed_blocks_count = 0
+    for time, lines in itertools.chain((first_timed_block,), timed_blocks):
+        timed_blocks_count += 1
         for line in lines:
             if not line: continue
             tokens = line.split(' ')
@@ -325,13 +360,12 @@ def _parse_proc_ps_log(writer, file):
             process.last_sys_cpu_time = sysCpu
         ltime = time
 
-    if len (timed_blocks) < 2:
+    if timed_blocks_count < 2:
         return None
 
-    startTime = timed_blocks[0][0]
-    avgSampleLength = (ltime - startTime)/(len (timed_blocks) - 1)
+    avgSampleLength = (ltime - startTime)/(timed_blocks_count - 1)
 
-    return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime)
+    return ProcessStats (writer, processMap, timed_blocks_count, avgSampleLength, startTime, ltime)
 
 def _parse_taskstats_log(writer, file):
     """