From: Tom Hughes Date: Thu, 15 Jul 2004 23:13:37 +0000 (+0000) Subject: Implement support for the async I/O system calls in 2.6 kernels. This X-Git-Tag: svn/VALGRIND_2_1_2~22 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=61fa07e868c634e67c20a4e46ea05e3c76c4c490;p=thirdparty%2Fvalgrind.git Implement support for the async I/O system calls in 2.6 kernels. This requires padding of the address space around calls to io_setup in order to constrain the kernel's choice of address for the I/O context. Based on patch from Scott Smith with various enhancements, this fixes bug #83060. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@2485 --- diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h index 80fd3bce48..c23f06844a 100644 --- a/coregrind/vg_include.h +++ b/coregrind/vg_include.h @@ -1604,6 +1604,9 @@ extern Segment *VG_(next_segment)(Segment *); extern Bool VG_(seg_contains)(const Segment *s, Addr ptr, UInt size); extern Bool VG_(seg_overlaps)(const Segment *s, Addr ptr, UInt size); +extern void VG_(pad_address_space)(void); +extern void VG_(unpad_address_space)(void); + extern __attribute__((regparm(1))) void VG_(unknown_esp_update) ( Addr new_ESP ); diff --git a/coregrind/vg_memory.c b/coregrind/vg_memory.c index 4a476f7584..06586e4460 100644 --- a/coregrind/vg_memory.c +++ b/coregrind/vg_memory.c @@ -545,6 +545,62 @@ Addr VG_(find_map_space)(Addr addr, UInt len, Bool for_client) return ret; } +void VG_(pad_address_space)(void) +{ + Addr addr = VG_(client_base); + Segment *s = VG_(SkipNode_First)(&sk_segments); + UInt args[6]; + Addr ret; + + args[2] = 0; + args[3] = VKI_MAP_FIXED | VKI_MAP_PRIVATE | VKI_MAP_ANONYMOUS; + args[4] = -1; + args[5] = 0; + + while (s && addr < VG_(valgrind_end)) { + if (addr < s->addr) { + args[0] = (UInt)addr; + args[1] = s->addr - addr; + + ret = VG_(do_syscall)(__NR_mmap, (UInt)args); + } + + addr = s->addr + s->len; + s = VG_(SkipNode_Next)(&sk_segments, s); + } + + if (addr < VG_(valgrind_end)) { + args[0] = (UInt)addr; + args[1] = VG_(valgrind_end) - addr; + + ret = VG_(do_syscall)(__NR_mmap, (UInt)args); + } + + return; +} + +void VG_(unpad_address_space)(void) +{ + Addr addr = VG_(client_base); + Segment *s = VG_(SkipNode_First)(&sk_segments); + Int ret; + + while (s && addr < VG_(valgrind_end)) { + if (addr < s->addr) { + ret = VG_(do_syscall)(__NR_munmap, (UInt)addr, s->addr - addr); + } + + addr = s->addr + s->len; + s = VG_(SkipNode_Next)(&sk_segments, s); + } + + if (addr < VG_(valgrind_end)) { + ret = VG_(do_syscall)(__NR_munmap, (UInt)addr, VG_(valgrind_end) - addr); + } + + return; +} + Segment *VG_(find_segment)(Addr a) { return VG_(SkipList_Find)(&sk_segments, &a); diff --git a/coregrind/vg_syscalls.c b/coregrind/vg_syscalls.c index ce6897d16a..62ad541d7b 100644 --- a/coregrind/vg_syscalls.c +++ b/coregrind/vg_syscalls.c @@ -5358,6 +5358,140 @@ POST(sigpending) PREALIAS(rt_sigpending, sigpending); POSTALIAS(rt_sigpending, sigpending); +PRE(io_setup) +{ + UInt size; + Addr addr; + + /* long io_setup (unsigned nr_events, aio_context_t *ctxp); */ + MAYBE_PRINTF("io_setup ( %ul, %p )\n",arg1,arg2); + SYSCALL_TRACK( pre_mem_write, tid, "io_setup(ctxp)", + arg2, sizeof(vki_aio_context_t) ); + + size = PGROUNDUP(sizeof(vki_aio_ring) + arg1 * sizeof(vki_io_event)); + addr = VG_(find_map_space)(0, size, True); + VG_(map_segment)(addr, size, VKI_PROT_READ|VKI_PROT_EXEC, SF_FIXED); + + VG_(pad_address_space)(); + res = VG_(do_syscall)(SYSNO, arg1, arg2); + VG_(unpad_address_space)(); + + if (res == 0) { + vki_aio_ring *r = *(vki_aio_ring **)arg2; + + vg_assert(addr == (Addr)r); + vg_assert(valid_client_addr(addr, size, tid, "io_setup")); + + VG_TRACK( new_mem_mmap, addr, size, True, True, False ); + VG_TRACK( post_mem_write, arg2, sizeof(vki_aio_context_t) ); + } + else { + VG_(unmap_range)(addr, size); + } +} + +PRE(io_destroy) +{ + Segment *s = VG_(find_segment)(arg1); + vki_aio_ring *r = *(vki_aio_ring **)arg1; + UInt size = PGROUNDUP(sizeof(vki_aio_ring) + r->nr * sizeof(vki_io_event)); + + /* long io_destroy (aio_context_t ctx); */ + MAYBE_PRINTF("io_destroy ( %ul )\n",arg1); + + res = VG_(do_syscall)(SYSNO, arg1); + + if (res == 0 && s != NULL && VG_(seg_contains)(s, arg1, size)) { + VG_TRACK( die_mem_munmap, arg1, size ); + VG_(unmap_range)(arg1, size); + } +} + +PRE(io_getevents) +{ + /* long io_getevents (aio_context_t ctx_id, long min_nr, long nr, + struct io_event *events, struct timespec *timeout); */ + MAYBE_PRINTF("io_getevents ( %ul, %l, %l, %p, %p )\n",arg1,arg2,arg3,arg4,arg5); + if (arg3 > 0) + SYSCALL_TRACK( pre_mem_write, tid, "io_getevents(events)", + arg4, sizeof(vki_io_event)*arg3 ); + if (arg5 != (UInt)NULL) + SYSCALL_TRACK( pre_mem_read, tid, "io_getevents(timeout)", + arg5, sizeof(struct timespec)); +} + +POST(io_getevents) +{ + int i; + + if (res > 0) { + VG_TRACK( post_mem_write, arg4, sizeof(vki_io_event)*res ); + for (i = 0; i < res; i++) { + const vki_io_event *vev = ((vki_io_event *)arg4) + i; + const vki_iocb *cb = (vki_iocb *)(UInt)vev->obj; + + switch (cb->aio_lio_opcode) { + case VKI_IOCB_CMD_PREAD: + if (vev->result > 0) + VG_TRACK( post_mem_write, cb->aio_buf, vev->result ); + break; + + case VKI_IOCB_CMD_PWRITE: + break; + + default: + VG_(message)(Vg_DebugMsg,"Warning: unhandled io_getevents opcode: %u\n",cb->aio_lio_opcode); + break; + } + } + } +} + +PRE(io_submit) +{ + int i; + + /* long io_submit (aio_context_t ctx_id, long nr, struct iocb **iocbpp); */ + MAYBE_PRINTF("io_submit( %ul, %l, %p )\n",arg1,arg2,arg3); + SYSCALL_TRACK( pre_mem_read, tid, "io_submit(iocbpp)", + arg3, sizeof(vki_iocb *)*arg2 ); + for (i = 0; i < arg2; i++) { + vki_iocb *cb = ((vki_iocb **)arg3)[i]; + SYSCALL_TRACK( pre_mem_read, tid, "io_submit(iocb)", + (UInt)cb, sizeof(vki_iocb) ); + switch (cb->aio_lio_opcode) { + case VKI_IOCB_CMD_PREAD: + SYSCALL_TRACK( pre_mem_write, tid, "io_submit(PREAD)", + cb->aio_buf, cb->aio_nbytes ); + break; + + case VKI_IOCB_CMD_PWRITE: + SYSCALL_TRACK( pre_mem_read, tid, "io_submit(PWRITE)", + cb->aio_buf, cb->aio_nbytes ); + break; + + default: + VG_(message)(Vg_DebugMsg,"Warning: unhandled io_submit opcode: %u\n",cb->aio_lio_opcode); + break; + } + } +} + +PRE(io_cancel) +{ + /* long io_cancel (aio_context_t ctx_id, struct iocb *iocb, + struct io_event *result); */ + MAYBE_PRINTF("io_cancel( %ul, %p, %p )\n",arg1,arg2,arg3); + SYSCALL_TRACK( pre_mem_read, tid, "io_cancel(iocb)", + arg2, sizeof(vki_iocb) ); + SYSCALL_TRACK( pre_mem_write, tid, "io_cancel(result)", + arg3, sizeof(vki_io_event) ); +} + +POST(io_cancel) +{ + VG_TRACK( post_mem_write, arg3, sizeof(vki_io_event) ); +} #undef SYSNO #undef res @@ -5414,6 +5548,9 @@ static const struct sys_info special_sys[] = { SYSB_(mmap, False), SYSB_(mremap, False), + SYSB_(io_setup, False), + SYSB_(io_destroy, False), + #if SIGNAL_SIMULATION SYSBA(sigaltstack, False), SYSBA(rt_sigaction, False), @@ -5626,6 +5763,10 @@ static const struct sys_info sys_info[] = { SYSBA(setitimer, True), /* not blocking, but must run in LWP context */ SYSBA(getitimer, True), /* not blocking, but must run in LWP context */ + SYSBA(io_getevents, True), + SYSB_(io_submit, False), + SYSBA(io_cancel, False), + #if !SIGNAL_SIMULATION SYSBA(sigaltstack, False), SYSBA(rt_sigaction, False),