]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Patch #510695: Add TSC profiling for the VM.
authorMartin v. Löwis <martin@v.loewis.de>
Tue, 8 Jun 2004 08:17:44 +0000 (08:17 +0000)
committerMartin v. Löwis <martin@v.loewis.de>
Tue, 8 Jun 2004 08:17:44 +0000 (08:17 +0000)
Doc/lib/libsys.tex
Include/pystate.h
Misc/NEWS
Python/ceval.c
Python/pystate.c
Python/sysmodule.c
README
configure
configure.in
pyconfig.h.in

index 3b813842cada447a95a71d29dc0bc5af2a51b1ea..ceced167eb4f5d20385feadf3744fe7a31a9f977 100644 (file)
@@ -475,6 +475,16 @@ else:
   debugged.
 \end{funcdesc}
 
+\begin{funcdesc}{settscdump}{on_flag}
+  Activate dumping of VM measurements using the Pentium timestamp
+  counter, if \var{on_flag} is true. Deactivate these dumps if
+  \var{on_flag} is off. The function is available only if Python
+  was compiled with \code{--with-tsc}. To understand the output
+  of this dump, read \code{Python/ceval.c}.
+
+  \versionadded{2.4}
+\end{funcdesc}
+
 \begin{datadesc}{stdin}
 \dataline{stdout}
 \dataline{stderr}
index 42391085f9148652034c6239b0ae80d504ffca28..1ff26268521fc06b790fdbe72430c1de3fc5e38a 100644 (file)
@@ -29,6 +29,9 @@ typedef struct _is {
 #ifdef HAVE_DLOPEN
     int dlopenflags;
 #endif
+#ifdef WITH_TSC
+    int tscdump;
+#endif
 
 } PyInterpreterState;
 
index cfd296e3fef0597bbb6dfeee90d758da6d54b7fd..a07d887d18dbccfb6aa166a8ca13a6b9f5b65a85 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -499,6 +499,9 @@ Tools/Demos
 Build
 -----
 
+- Profiling the VM using the Pentium TSC is now possible if Python
+  is configured --with-tsc.
+
 - In order to find libraries, setup.py now also looks in /lib64, for use
   on AMD64.
 
index 2d600141f621e09c8eeaefd6d8fa669b492b756d..a52168c71fd9c3a90ad4ea5d8a8d7c00749d110d 100644 (file)
 
 #include <ctype.h>
 
+#ifdef WITH_TSC
+#include <asm/msr.h>
+
+typedef unsigned long long uint64;
+
+void dump_tsc(int opcode, int ticked, uint64 inst0, uint64 inst1, 
+             uint64 loop0, uint64 loop1, uint64 intr0, uint64 intr1)
+{
+       uint64 intr, inst, loop;
+       PyThreadState *tstate = PyThreadState_Get();
+       if (!tstate->interp->tscdump)
+               return;
+       intr = intr1 - intr0;
+       inst = inst1 - inst0 - intr;
+       loop = loop1 - loop0 - intr;
+       fprintf(stderr, "opcode=%03d t=%d inst=%06lld loop=%06lld\n",
+               opcode, ticked, inst, loop);
+}
+#endif
+
 /* Turn this on if your compiler chokes on the big switch: */
 /* #define CASE_TOO_BIG 1 */
 
@@ -30,7 +50,11 @@ typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *);
 
 /* Forward declarations */
 static PyObject *eval_frame(PyFrameObject *);
+#ifdef WITH_TSC
+static PyObject *call_function(PyObject ***, int, uint64*, uint64*);
+#else
 static PyObject *call_function(PyObject ***, int);
+#endif
 static PyObject *fast_function(PyObject *, PyObject ***, int, int, int);
 static PyObject *do_call(PyObject *, PyObject ***, int, int);
 static PyObject *ext_do_call(PyObject *, PyObject ***, int, int, int);
@@ -485,6 +509,44 @@ eval_frame(PyFrameObject *f)
 #define GETITEM(v, i) PyTuple_GetItem((v), (i))
 #endif
 
+#ifdef WITH_TSC
+/* Use Pentium timestamp counter to mark certain events:
+   inst0 -- beginning of switch statement for opcode dispatch
+   inst1 -- end of switch statement (may be skipped)
+   loop0 -- the top of the mainloop
+   loop1 -- place where control returns again to top of mainloop 
+            (may be skipped)
+   intr1 -- beginning of long interruption
+   intr2 -- end of long interruption
+
+   Many opcodes call out to helper C functions.  In some cases, the
+   time in those functions should be counted towards the time for the
+   opcode, but not in all cases.  For example, a CALL_FUNCTION opcode
+   calls another Python function; there's no point in charge all the
+   bytecode executed by the called function to the caller.
+
+   It's hard to make a useful judgement statically.  In the presence
+   of operator overloading, it's impossible to tell if a call will
+   execute new Python code or not.
+
+   It's a case-by-case judgement.  I'll use intr1 for the following
+   cases:
+
+   EXEC_STMT
+   IMPORT_STAR
+   IMPORT_FROM
+   CALL_FUNCTION (and friends)
+
+ */
+       uint64 inst0, inst1, loop0, loop1, intr0 = 0, intr1 = 0;
+       int ticked = 0;
+
+       rdtscll(inst0);
+       rdtscll(inst1);
+       rdtscll(loop0);
+       rdtscll(loop1);
+#endif
+
 /* Code access macros */
 
 #define INSTR_OFFSET() (next_instr - first_instr)
@@ -643,6 +705,23 @@ eval_frame(PyFrameObject *f)
        w = NULL;
 
        for (;;) {
+#ifdef WITH_TSC
+               if (inst1 == 0) {
+                       /* Almost surely, the opcode executed a break
+                          or a continue, preventing inst1 from being set
+                          on the way out of the loop.
+                       */
+                       rdtscll(inst1);
+                       loop1 = inst1;
+               }
+               dump_tsc(opcode, ticked, inst0, inst1, loop0, loop1,
+                        intr0, intr1);
+               ticked = 0;
+               inst1 = 0;
+               intr0 = 0;
+               intr1 = 0;
+               rdtscll(loop0);
+#endif
                assert(stack_pointer >= f->f_valuestack); /* else underflow */
                assert(STACK_LEVEL() <= f->f_stacksize);  /* else overflow */
 
@@ -662,6 +741,9 @@ eval_frame(PyFrameObject *f)
                         }
                        _Py_Ticker = _Py_CheckInterval;
                        tstate->tick_counter++;
+#ifdef WITH_TSC
+                       ticked = 1;
+#endif
                        if (things_to_do) {
                                if (Py_MakePendingCalls() < 0) {
                                        why = WHY_EXCEPTION;
@@ -752,6 +834,9 @@ eval_frame(PyFrameObject *f)
 #endif
 
                /* Main switch on opcode */
+#ifdef WITH_TSC
+               rdtscll(inst0);
+#endif
 
                switch (opcode) {
 
@@ -1493,7 +1578,13 @@ eval_frame(PyFrameObject *f)
                        v = SECOND();
                        u = THIRD();
                        STACKADJ(-3);
+#ifdef WITH_TSC
+                       rdtscll(intr0);
+#endif
                        err = exec_statement(f, u, v, w);
+#ifdef WITH_TSC
+                       rdtscll(intr1);
+#endif
                        Py_DECREF(u);
                        Py_DECREF(v);
                        Py_DECREF(w);
@@ -1855,7 +1946,13 @@ eval_frame(PyFrameObject *f)
                                x = NULL;
                                break;
                        }
+#ifdef WITH_TSC
+                       rdtscll(intr0);
+#endif
                        x = PyEval_CallObject(x, w);
+#ifdef WITH_TSC
+                       rdtscll(intr1);
+#endif
                        Py_DECREF(w);
                        SET_TOP(x);
                        if (x != NULL) continue;
@@ -1869,7 +1966,13 @@ eval_frame(PyFrameObject *f)
                                        "no locals found during 'import *'");
                                break;
                        }
+#ifdef WITH_TSC
+                       rdtscll(intr0);
+#endif
                        err = import_all_from(x, v);
+#ifdef WITH_TSC
+                       rdtscll(intr1);
+#endif
                        PyFrame_LocalsToFast(f, 0);
                        Py_DECREF(v);
                        if (err == 0) continue;
@@ -1878,7 +1981,13 @@ eval_frame(PyFrameObject *f)
                case IMPORT_FROM:
                        w = GETITEM(names, oparg);
                        v = TOP();
+#ifdef WITH_TSC
+                       rdtscll(intr0);
+#endif
                        x = import_from(v, w);
+#ifdef WITH_TSC
+                       rdtscll(intr1);
+#endif
                        PUSH(x);
                        if (x != NULL) continue;
                        break;
@@ -1987,7 +2096,11 @@ eval_frame(PyFrameObject *f)
 
                case CALL_FUNCTION:
                        PCALL(PCALL_ALL);
+#ifdef WITH_TSC
+                       x = call_function(&stack_pointer, oparg, &intr0, &intr1);
+#else
                        x = call_function(&stack_pointer, oparg);
+#endif
                        PUSH(x);
                        if (x != NULL)
                                continue;
@@ -2022,7 +2135,13 @@ eval_frame(PyFrameObject *f)
                            n++;
                    } else
                            Py_INCREF(func);
+#ifdef WITH_TSC
+                   rdtscll(intr0);
+#endif
                    x = ext_do_call(func, &stack_pointer, flags, na, nk);
+#ifdef WITH_TSC
+                   rdtscll(intr1);
+#endif
                    Py_DECREF(func);
 
                    while (stack_pointer > pfunc) {
@@ -2134,6 +2253,10 @@ eval_frame(PyFrameObject *f)
 
            on_error:
 
+#ifdef WITH_TSC
+               rdtscll(inst1);
+#endif
+
                /* Quickly continue if no error occurred */
 
                if (why == WHY_NOT) {
@@ -2143,9 +2266,15 @@ eval_frame(PyFrameObject *f)
                                if (PyErr_Occurred())
                                        fprintf(stderr,
                                                "XXX undetected error\n");
-                               else
+                               else {
+#endif
+#ifdef WITH_TSC
+                                       rdtscll(loop1);
 #endif
                                        continue; /* Normal, fast path */
+#ifdef CHECKEXC
+                               }
+#endif
                        }
                        why = WHY_EXCEPTION;
                        x = Py_None;
@@ -2260,6 +2389,9 @@ fast_block_end:
 
                if (why != WHY_NOT)
                        break;
+#ifdef WITH_TSC
+               rdtscll(loop1);
+#endif
 
        } /* main loop */
 
@@ -3331,7 +3463,11 @@ if (tstate->use_tracing) { \
 
 
 static PyObject *
-call_function(PyObject ***pp_stack, int oparg)
+call_function(PyObject ***pp_stack, int oparg
+#ifdef WITH_TSC
+               , uint64* pintr0, uint64* pintr1
+#endif
+               )
 {
        int na = oparg & 0xff;
        int nk = (oparg>>8) & 0xff;
@@ -3374,7 +3510,13 @@ call_function(PyObject ***pp_stack, int oparg)
                        PyObject *callargs;
                        callargs = load_args(pp_stack, na);
                        BEGIN_C_TRACE
+#ifdef WITH_TSC
+                       rdtscll(*pintr0);
+#endif
                        x = PyCFunction_Call(func, callargs, NULL);
+#ifdef WITH_TSC
+                       rdtscll(*pintr1);
+#endif
                        END_C_TRACE
                        Py_XDECREF(callargs);
                }
@@ -3393,10 +3535,16 @@ call_function(PyObject ***pp_stack, int oparg)
                        n++;
                } else
                        Py_INCREF(func);
+#ifdef WITH_TSC
+               rdtscll(*pintr0);
+#endif
                if (PyFunction_Check(func))
                        x = fast_function(func, pp_stack, n, na, nk);
                else
                        x = do_call(func, pp_stack, na, nk);
+#ifdef WITH_TSC
+               rdtscll(*pintr1);
+#endif
                Py_DECREF(func);
        }
 
index b0eb4c48a72bdb2d4b7b1314df6fc26c07fc3bca..6a8beb940cd16c44a012cb75e939c80733f0cccb 100644 (file)
@@ -58,6 +58,9 @@ PyInterpreterState_New(void)
 #else
                interp->dlopenflags = RTLD_LAZY;
 #endif
+#endif
+#ifdef WITH_TSC
+               interp->tscdump = 0;
 #endif
 
                HEAD_LOCK();
index 09e411e288aa9ac7a36f665992d7411dea7c2ce3..55c04251754ebd82fcdff954c9c3d9b12aa0eff6 100644 (file)
@@ -442,6 +442,33 @@ PyDoc_STRVAR(getcheckinterval_doc,
 "getcheckinterval() -> current check interval; see setcheckinterval()."
 );
 
+#ifdef WITH_TSC
+static PyObject *
+sys_settscdump(PyObject *self, PyObject *args)
+{
+       int bool;
+       PyThreadState *tstate = PyThreadState_Get();
+
+       if (!PyArg_ParseTuple(args, "i:settscdump", &bool))
+               return NULL;
+       if (bool)
+               tstate->interp->tscdump = 1;
+       else
+               tstate->interp->tscdump = 0;
+       Py_INCREF(Py_None);
+       return Py_None;
+       
+}
+
+PyDoc_STRVAR(settscdump_doc, 
+"settscdump(bool)\n\
+\n\
+If true, tell the Python interpreter to dump VM measurements to\n\
+stderr.  If false, turn off dump.  The measurements are based on the\n\
+Pentium time-stamp counter."
+); 
+#endif TSC
+
 static PyObject *
 sys_setrecursionlimit(PyObject *self, PyObject *args)
 {
@@ -743,6 +770,9 @@ static PyMethodDef sys_methods[] = {
        {"setprofile",  sys_setprofile, METH_O, setprofile_doc},
        {"setrecursionlimit", sys_setrecursionlimit, METH_VARARGS,
         setrecursionlimit_doc},
+#ifdef WITH_TSC
+       {"settscdump", sys_settscdump, METH_VARARGS, settscdump_doc},
+#endif
        {"settrace",    sys_settrace, METH_O, settrace_doc},
        {"call_tracing", sys_call_tracing, METH_VARARGS, call_tracing_doc},
        {NULL,          NULL}           /* sentinel */
diff --git a/README b/README
index 9997ea2c757d3502329cf57dfc186c8894b6b97a..f020d3aa6c4c82cde75697e71822e6306e76895d 100644 (file)
--- a/README
+++ b/README
@@ -1026,6 +1026,7 @@ Modules/getpath.o.
        in files. Python code can open a file with open(file, 'U') to
        read it in universal newline mode. THIS OPTION IS UNSUPPORTED.
 
+--with-tsc: Profile using the Pentium timestamping counter (TSC).
 
 Building for multiple architectures (using the VPATH feature)
 -------------------------------------------------------------
index 5619b2cd7750b69042742902c309a934f9dc03e2..697f575d46a1256171e911b0648961ca612ec4aa 100755 (executable)
--- a/configure
+++ b/configure
@@ -1,5 +1,5 @@
 #! /bin/sh
-# From configure.in Revision: 1.455 .
+# From configure.in Revision: 1.456 .
 # Guess values for system-dependent variables and create Makefiles.
 # Generated by GNU Autoconf 2.57 for python 2.4.
 #
@@ -867,6 +867,7 @@ Optional Packages:
                           deprecated; use --with(out)-threads
   --with-pth              use GNU pth threading libraries
   --with(out)-doc-strings disable/enable documentation strings
+  --with(out)-tsc         enable/disable timestamp counter profile
   --with(out)-pymalloc    disable/enable specialized mallocs
   --with-wctype-functions use wctype.h functions
   --with-fpectl           enable SIGFPE catching
 echo "$as_me:$LINENO: result: $with_doc_strings" >&5
 echo "${ECHO_T}$with_doc_strings" >&6
 
+# Check for Python-specific malloc support
+echo "$as_me:$LINENO: checking for --with-tsc" >&5
+echo $ECHO_N "checking for --with-tsc... $ECHO_C" >&6
+
+# Check whether --with-tsc or --without-tsc was given.
+if test "${with_tsc+set}" = set; then
+  withval="$with_tsc"
+
+if test "$withval" != no
+then
+
+cat >>confdefs.h <<\_ACEOF
+#define WITH_TSC 1
+_ACEOF
+
+    echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+else echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+else
+  echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi;
+
 # Check for Python-specific malloc support
 echo "$as_me:$LINENO: checking for --with-pymalloc" >&5
 echo $ECHO_N "checking for --with-pymalloc... $ECHO_C" >&6
index cd84ea9abf59ad047b8085a9dc3285fff56b84e5..cff0ed3f0684539c1fa7928531b61e3daca7c291 100644 (file)
@@ -1928,6 +1928,19 @@ then
 fi
 AC_MSG_RESULT($with_doc_strings)
 
+# Check for Python-specific malloc support
+AC_MSG_CHECKING(for --with-tsc)
+AC_ARG_WITH(tsc,
+[  --with(out)-tsc         enable/disable timestamp counter profile], [
+if test "$withval" != no
+then 
+  AC_DEFINE(WITH_TSC, 1, 
+    [Define to profile with the Pentium timestamp counter]) 
+    AC_MSG_RESULT(yes)
+else AC_MSG_RESULT(no)
+fi],
+[AC_MSG_RESULT(no)])
+
 # Check for Python-specific malloc support
 AC_MSG_CHECKING(for --with-pymalloc)
 AC_ARG_WITH(pymalloc,
index fd4a12408328b561da62cc313525f92c6621a75d..8cb0cffb78c3ff4d9b26fedec0659d3285ea4c46 100644 (file)
 /* Define if you want to compile in rudimentary thread support */
 #undef WITH_THREAD
 
+/* Define to profile with the Pentium timestamp counter */
+#undef WITH_TSC
+
 /* Define to 1 if your processor stores words with the most significant byte
    first (like Motorola and SPARC, unlike Intel and VAX). */
 #undef WORDS_BIGENDIAN