Implement --weird-hacks=truncate-writes to limit the size of write syscalls

author Julian Seward <jseward@acm.org>

Sun, 30 Jun 2002 12:44:54 +0000 (12:44 +0000)

committer Julian Seward <jseward@acm.org>

Sun, 30 Jun 2002 12:44:54 +0000 (12:44 +0000)
author Julian Seward <jseward@acm.org>
Sun, 30 Jun 2002 12:44:54 +0000 (12:44 +0000)
committer Julian Seward <jseward@acm.org>
Sun, 30 Jun 2002 12:44:54 +0000 (12:44 +0000)
diff --git a/cachegrind/docs/manual.html b/cachegrind/docs/manual.html

index ba6ef5c4189f7f86cc57d38dcffc6cc7e5b35438..b86962c0394abbaec24f3c7bdfdfe290183adae0 100644 (file)
--- a/cachegrind/docs/manual.html
+++ b/cachegrind/docs/manual.html
@@ -619,6 +619,37 @@ follows:
            To find out if your program is blocking unexpectedly in the
            <code>read</code> system call, run with
            <code>--trace-syscalls=yes</code> flag.
+      <p>
+      <li><code>truncate-writes</code> Use this if you have a threaded
+          program which appears to unexpectedly block whilst writing
+          into a pipe.  The effect is to modify all calls to
+          <code>write()</code> so that requests to write more than
+          4096 bytes are treated as if they only requested a write of
+          4096 bytes.  Valgrind does this by changing the
+          <code>count</code> argument of <code>write()</code>, as
+          passed to the kernel, so that it is at most 4096.  The
+          amount of data written will then be less than the client
+          program asked for, but the client should have a loop around
+          its <code>write()</code> call to check whether the requested
+          number of bytes have been written.  If not, it should issue
+          further <code>write()</code> calls until all the data is
+          written.
+          <p>
+          This all sounds pretty dodgy to me, which is why I've made
+          this behaviour only happen on request.  It is not the
+          default behaviour.  At the time of writing this (30 June
+          2002) I have only seen one example where this is necessary,
+          so either the problem is extremely rare or nobody is using
+          Valgrind :-)
+          <p>
+          On experimentation I see that <code>truncate-writes</code>
+          doesn't interact well with <code>ioctl-VTIME</code>, so you
+          probably don't want to try both at once.
+          <p>
+          As above, to find out if your program is blocking
+          unexpectedly in the <code>write()</code> system call, you
+          may find the <code>--trace-syscalls=yes
+          --trace-sched=yes</code> flags useful.
        </ul>
  
        </li><p>
diff --git a/coregrind/docs/manual.html b/coregrind/docs/manual.html

index ba6ef5c4189f7f86cc57d38dcffc6cc7e5b35438..b86962c0394abbaec24f3c7bdfdfe290183adae0 100644 (file)
--- a/coregrind/docs/manual.html
+++ b/coregrind/docs/manual.html
@@ -619,6 +619,37 @@ follows:
            To find out if your program is blocking unexpectedly in the
            <code>read</code> system call, run with
            <code>--trace-syscalls=yes</code> flag.
+      <p>
+      <li><code>truncate-writes</code> Use this if you have a threaded
+          program which appears to unexpectedly block whilst writing
+          into a pipe.  The effect is to modify all calls to
+          <code>write()</code> so that requests to write more than
+          4096 bytes are treated as if they only requested a write of
+          4096 bytes.  Valgrind does this by changing the
+          <code>count</code> argument of <code>write()</code>, as
+          passed to the kernel, so that it is at most 4096.  The
+          amount of data written will then be less than the client
+          program asked for, but the client should have a loop around
+          its <code>write()</code> call to check whether the requested
+          number of bytes have been written.  If not, it should issue
+          further <code>write()</code> calls until all the data is
+          written.
+          <p>
+          This all sounds pretty dodgy to me, which is why I've made
+          this behaviour only happen on request.  It is not the
+          default behaviour.  At the time of writing this (30 June
+          2002) I have only seen one example where this is necessary,
+          so either the problem is extremely rare or nobody is using
+          Valgrind :-)
+          <p>
+          On experimentation I see that <code>truncate-writes</code>
+          doesn't interact well with <code>ioctl-VTIME</code>, so you
+          probably don't want to try both at once.
+          <p>
+          As above, to find out if your program is blocking
+          unexpectedly in the <code>write()</code> system call, you
+          may find the <code>--trace-syscalls=yes
+          --trace-sched=yes</code> flags useful.
        </ul>
  
        </li><p>
diff --git a/coregrind/valgrind.in b/coregrind/valgrind.in

index fe31ce6bbc6a1f4448a8df2e527b220627599838..7b99277254c15c5c5c98d9ada50d8519487a6e3e 100755 (executable)
--- a/coregrind/valgrind.in
+++ b/coregrind/valgrind.in
@@ -145,7 +145,7 @@ if [ $# = 0 ] || [ z"$dousage" = z1 ]; then
     echo "    --D1=<size>,<assoc>,<line_size>  set D1 cache manually"
     echo "    --L2=<size>,<assoc>,<line_size>  set L2 cache manually"
     echo "    --weird-hacks=hack1,hack2,...  [no hacks selected]"
-   echo "         recognised hacks are: ioctl-VTIME"
+   echo "         recognised hacks are: ioctl-VTIME truncate-writes"
     echo ""
     echo
     echo "  options for debugging Valgrind itself are:"
diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c

index 076b160ef562289c9dbf7a810936433bb36ad281..8881f9ae64c5312ad5c40f077aec1aa1ab0f39dc 100644 (file)
--- a/coregrind/vg_scheduler.c
+++ b/coregrind/vg_scheduler.c
@@ -1001,7 +1001,7 @@ void poll_for_ready_fds ( void )
           VG_(printf)("offending fd = %d\n", fd);
           VG_(panic)("poll_for_ready_fds: multiple events on fd");
        }
-      
+
        /* An I/O event completed for fd.  Find the thread which
           requested this. */
        for (i = 0; i < VG_N_WAITING_FDS; i++) {
@@ -1050,9 +1050,25 @@ void complete_blocked_syscalls ( void )
           number, because the speculative call made by
           sched_do_syscall() doesn't change %EAX in the case where the
           call would have blocked. */
-
        syscall_no = vg_waiting_fds[i].syscall_no;
        vg_assert(syscall_no == VG_(threads)[tid].m_eax);
+
+      /* In a rare case pertaining to writing into a pipe, write()
+         will block when asked to write > 4096 bytes even though the
+         kernel claims, when asked via select(), that blocking will
+         not occur for a write on that fd.  This can cause deadlocks.
+         An easy answer is to limit the size of the write to 4096
+         anyway and hope that the client program's logic can handle
+         the short write.  That shoulds dubious to me, so we don't do
+         it by default. */
+      if (syscall_no == __NR_write 
+          && VG_(threads)[tid].m_edx /* arg3, count */ > 4096
+          && VG_(strstr)(VG_(clo_weird_hacks), "truncate-writes") != NULL) {
+         /* VG_(printf)("truncate write from %d to 4096\n", 
+            VG_(threads)[tid].m_edx ); */
+         VG_(threads)[tid].m_edx = 4096;
+      }
+
        KERNEL_DO_SYSCALL(tid,res);
        VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
  
diff --git a/docs/manual.html b/docs/manual.html

index ba6ef5c4189f7f86cc57d38dcffc6cc7e5b35438..b86962c0394abbaec24f3c7bdfdfe290183adae0 100644 (file)
--- a/docs/manual.html
+++ b/docs/manual.html
@@ -619,6 +619,37 @@ follows:
            To find out if your program is blocking unexpectedly in the
            <code>read</code> system call, run with
            <code>--trace-syscalls=yes</code> flag.
+      <p>
+      <li><code>truncate-writes</code> Use this if you have a threaded
+          program which appears to unexpectedly block whilst writing
+          into a pipe.  The effect is to modify all calls to
+          <code>write()</code> so that requests to write more than
+          4096 bytes are treated as if they only requested a write of
+          4096 bytes.  Valgrind does this by changing the
+          <code>count</code> argument of <code>write()</code>, as
+          passed to the kernel, so that it is at most 4096.  The
+          amount of data written will then be less than the client
+          program asked for, but the client should have a loop around
+          its <code>write()</code> call to check whether the requested
+          number of bytes have been written.  If not, it should issue
+          further <code>write()</code> calls until all the data is
+          written.
+          <p>
+          This all sounds pretty dodgy to me, which is why I've made
+          this behaviour only happen on request.  It is not the
+          default behaviour.  At the time of writing this (30 June
+          2002) I have only seen one example where this is necessary,
+          so either the problem is extremely rare or nobody is using
+          Valgrind :-)
+          <p>
+          On experimentation I see that <code>truncate-writes</code>
+          doesn't interact well with <code>ioctl-VTIME</code>, so you
+          probably don't want to try both at once.
+          <p>
+          As above, to find out if your program is blocking
+          unexpectedly in the <code>write()</code> system call, you
+          may find the <code>--trace-syscalls=yes
+          --trace-sched=yes</code> flags useful.
        </ul>
  
        </li><p>
diff --git a/memcheck/docs/manual.html b/memcheck/docs/manual.html

index ba6ef5c4189f7f86cc57d38dcffc6cc7e5b35438..b86962c0394abbaec24f3c7bdfdfe290183adae0 100644 (file)
--- a/memcheck/docs/manual.html
+++ b/memcheck/docs/manual.html
@@ -619,6 +619,37 @@ follows:
            To find out if your program is blocking unexpectedly in the
            <code>read</code> system call, run with
            <code>--trace-syscalls=yes</code> flag.
+      <p>
+      <li><code>truncate-writes</code> Use this if you have a threaded
+          program which appears to unexpectedly block whilst writing
+          into a pipe.  The effect is to modify all calls to
+          <code>write()</code> so that requests to write more than
+          4096 bytes are treated as if they only requested a write of
+          4096 bytes.  Valgrind does this by changing the
+          <code>count</code> argument of <code>write()</code>, as
+          passed to the kernel, so that it is at most 4096.  The
+          amount of data written will then be less than the client
+          program asked for, but the client should have a loop around
+          its <code>write()</code> call to check whether the requested
+          number of bytes have been written.  If not, it should issue
+          further <code>write()</code> calls until all the data is
+          written.
+          <p>
+          This all sounds pretty dodgy to me, which is why I've made
+          this behaviour only happen on request.  It is not the
+          default behaviour.  At the time of writing this (30 June
+          2002) I have only seen one example where this is necessary,
+          so either the problem is extremely rare or nobody is using
+          Valgrind :-)
+          <p>
+          On experimentation I see that <code>truncate-writes</code>
+          doesn't interact well with <code>ioctl-VTIME</code>, so you
+          probably don't want to try both at once.
+          <p>
+          As above, to find out if your program is blocking
+          unexpectedly in the <code>write()</code> system call, you
+          may find the <code>--trace-syscalls=yes
+          --trace-sched=yes</code> flags useful.
        </ul>
  
        </li><p>
diff --git a/valgrind.in b/valgrind.in

index fe31ce6bbc6a1f4448a8df2e527b220627599838..7b99277254c15c5c5c98d9ada50d8519487a6e3e 100755 (executable)
--- a/valgrind.in
+++ b/valgrind.in
@@ -145,7 +145,7 @@ if [ $# = 0 ] || [ z"$dousage" = z1 ]; then
     echo "    --D1=<size>,<assoc>,<line_size>  set D1 cache manually"
     echo "    --L2=<size>,<assoc>,<line_size>  set L2 cache manually"
     echo "    --weird-hacks=hack1,hack2,...  [no hacks selected]"
-   echo "         recognised hacks are: ioctl-VTIME"
+   echo "         recognised hacks are: ioctl-VTIME truncate-writes"
     echo ""
     echo
     echo "  options for debugging Valgrind itself are:"
diff --git a/vg_scheduler.c b/vg_scheduler.c

index 076b160ef562289c9dbf7a810936433bb36ad281..8881f9ae64c5312ad5c40f077aec1aa1ab0f39dc 100644 (file)
--- a/vg_scheduler.c
+++ b/vg_scheduler.c
@@ -1001,7 +1001,7 @@ void poll_for_ready_fds ( void )
           VG_(printf)("offending fd = %d\n", fd);
           VG_(panic)("poll_for_ready_fds: multiple events on fd");
        }
-      
+
        /* An I/O event completed for fd.  Find the thread which
           requested this. */
        for (i = 0; i < VG_N_WAITING_FDS; i++) {
@@ -1050,9 +1050,25 @@ void complete_blocked_syscalls ( void )
           number, because the speculative call made by
           sched_do_syscall() doesn't change %EAX in the case where the
           call would have blocked. */
-
        syscall_no = vg_waiting_fds[i].syscall_no;
        vg_assert(syscall_no == VG_(threads)[tid].m_eax);
+
+      /* In a rare case pertaining to writing into a pipe, write()
+         will block when asked to write > 4096 bytes even though the
+         kernel claims, when asked via select(), that blocking will
+         not occur for a write on that fd.  This can cause deadlocks.
+         An easy answer is to limit the size of the write to 4096
+         anyway and hope that the client program's logic can handle
+         the short write.  That shoulds dubious to me, so we don't do
+         it by default. */
+      if (syscall_no == __NR_write 
+          && VG_(threads)[tid].m_edx /* arg3, count */ > 4096
+          && VG_(strstr)(VG_(clo_weird_hacks), "truncate-writes") != NULL) {
+         /* VG_(printf)("truncate write from %d to 4096\n", 
+            VG_(threads)[tid].m_edx ); */
+         VG_(threads)[tid].m_edx = 4096;
+      }
+
        KERNEL_DO_SYSCALL(tid,res);
        VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
author	Julian Seward <jseward@acm.org>
	Sun, 30 Jun 2002 12:44:54 +0000 (12:44 +0000)
committer	Julian Seward <jseward@acm.org>
	Sun, 30 Jun 2002 12:44:54 +0000 (12:44 +0000)
cachegrind/docs/manual.html		patch \| blob \| blame \| history
coregrind/docs/manual.html		patch \| blob \| blame \| history
coregrind/valgrind.in		patch \| blob \| blame \| history
coregrind/vg_scheduler.c		patch \| blob \| blame \| history
docs/manual.html		patch \| blob \| blame \| history
memcheck/docs/manual.html		patch \| blob \| blame \| history
valgrind.in		patch \| blob \| blame \| history
vg_scheduler.c		patch \| blob \| blame \| history