]> git.ipfire.org Git - thirdparty/valgrind.git/blob - coregrind/m_syswrap/syswrap-generic.c
VG_(show_open_fds) Only emit empty line when not creating xml output
[thirdparty/valgrind.git] / coregrind / m_syswrap / syswrap-generic.c
1 /* -*- mode: C; c-basic-offset: 3; -*- */
2
3 /*--------------------------------------------------------------------*/
4 /*--- Wrappers for generic Unix system calls ---*/
5 /*--- syswrap-generic.c ---*/
6 /*--------------------------------------------------------------------*/
7
8 /*
9 This file is part of Valgrind, a dynamic binary instrumentation
10 framework.
11
12 Copyright (C) 2000-2017 Julian Seward
13 jseward@acm.org
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, see <http://www.gnu.org/licenses/>.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris) || defined(VGO_freebsd)
32
33 #include "pub_core_basics.h"
34 #include "pub_core_vki.h"
35 #include "pub_core_vkiscnums.h"
36 #include "pub_core_threadstate.h"
37 #include "pub_core_debuginfo.h" // VG_(di_notify_*)
38 #include "pub_core_aspacemgr.h"
39 #include "pub_core_transtab.h" // VG_(discard_translations)
40 #include "pub_core_xarray.h"
41 #include "pub_core_clientstate.h" // VG_(brk_base), VG_(brk_limit)
42 #include "pub_core_debuglog.h"
43 #include "pub_core_errormgr.h" // For VG_(maybe_record_error)
44 #include "pub_core_gdbserver.h" // VG_(gdbserver)
45 #include "pub_core_libcbase.h"
46 #include "pub_core_libcassert.h"
47 #include "pub_core_libcfile.h"
48 #include "pub_core_libcprint.h"
49 #include "pub_core_libcproc.h"
50 #include "pub_core_libcsignal.h"
51 #include "pub_core_machine.h" // VG_(get_SP)
52 #include "pub_core_mallocfree.h"
53 #include "pub_core_options.h"
54 #include "pub_core_scheduler.h"
55 #include "pub_core_signals.h"
56 #include "pub_core_stacktrace.h" // For VG_(get_and_pp_StackTrace)()
57 #include "pub_core_syscall.h"
58 #include "pub_core_syswrap.h"
59 #include "pub_core_tooliface.h"
60 #include "pub_core_ume.h"
61 #include "pub_core_stacks.h"
62
63 #include "priv_types_n_macros.h"
64 #include "priv_syswrap-generic.h"
65
66 #include "config.h"
67
68 static
69 HChar *getsockdetails(Int fd, UInt len, HChar *buf);
70
71 void ML_(guess_and_register_stack) (Addr sp, ThreadState* tst)
72 {
73 Bool debug = False;
74 NSegment const* seg;
75
76 /* We don't really know where the client stack is, because its
77 allocated by the client. The best we can do is look at the
78 memory mappings and try to derive some useful information. We
79 assume that sp starts near its highest possible value, and can
80 only go down to the start of the mmaped segment. */
81 seg = VG_(am_find_nsegment)(sp);
82 if (seg
83 && VG_(am_is_valid_for_client)(sp, 1, VKI_PROT_READ | VKI_PROT_WRITE)) {
84 tst->client_stack_highest_byte = (Addr)VG_PGROUNDUP(sp)-1;
85 tst->client_stack_szB = tst->client_stack_highest_byte - seg->start + 1;
86
87 tst->os_state.stk_id
88 = VG_(register_stack)(seg->start, tst->client_stack_highest_byte);
89
90 if (debug)
91 VG_(printf)("tid %u: guessed client stack range [%#lx-%#lx]"
92 " as stk_id %lu\n",
93 tst->tid, seg->start, tst->client_stack_highest_byte,
94 tst->os_state.stk_id);
95 } else {
96 VG_(message)(Vg_UserMsg,
97 "!? New thread %u starts with SP(%#lx) unmapped\n",
98 tst->tid, sp);
99 tst->client_stack_highest_byte = 0;
100 tst->client_stack_szB = 0;
101 }
102 }
103
104 /* Returns True iff address range is something the client can
105 plausibly mess with: all of it is either already belongs to the
106 client or is free or a reservation. */
107
108 Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
109 const HChar *syscallname)
110 {
111 Bool ret;
112
113 if (size == 0)
114 return True;
115
116 ret = VG_(am_is_valid_for_client_or_free_or_resvn)
117 (start,size,VKI_PROT_NONE);
118
119 if (0)
120 VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
121 syscallname, start, start+size-1, (Int)ret);
122
123 if (!ret && syscallname != NULL) {
124 VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
125 "to modify addresses %#lx-%#lx\n",
126 syscallname, start, start+size-1);
127 if (VG_(clo_verbosity) > 1) {
128 VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
129 }
130 }
131
132 return ret;
133 }
134
135
136 Bool ML_(client_signal_OK)(Int sigNo)
137 {
138 /* signal 0 is OK for kill */
139 Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
140
141 //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
142
143 return ret;
144 }
145
146
147 /* Handy small function to help stop wrappers from segfaulting when
148 presented with bogus client addresses. Is not used for generating
149 user-visible errors. */
150
151 Bool ML_(safe_to_deref) ( const void *start, SizeT size )
152 {
153 return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
154 }
155
156
157 /* ---------------------------------------------------------------------
158 Doing mmap, mremap
159 ------------------------------------------------------------------ */
160
161 /* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
162 munmap, mprotect (and mremap??) work at the page level. So addresses
163 and lengths must be adjusted for this. */
164
165 /* Mash around start and length so that the area exactly covers
166 an integral number of pages. If we don't do that, memcheck's
167 idea of addressible memory diverges from that of the
168 kernel's, which causes the leak detector to crash. */
169 static
170 void page_align_addr_and_len( Addr* a, SizeT* len)
171 {
172 Addr ra;
173
174 ra = VG_PGROUNDDN(*a);
175 *len = VG_PGROUNDUP(*a + *len) - ra;
176 *a = ra;
177 }
178
179 static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
180 UInt flags, Int fd, Off64T offset)
181 {
182 Bool d;
183
184 /* 'a' is the return value from a real kernel mmap, hence: */
185 vg_assert(VG_IS_PAGE_ALIGNED(a));
186 /* whereas len is whatever the syscall supplied. So: */
187 len = VG_PGROUNDUP(len);
188
189 d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
190
191 if (d)
192 VG_(discard_translations)( a, (ULong)len,
193 "notify_core_of_mmap" );
194 }
195
196 static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
197 {
198 Bool rr, ww, xx;
199
200 /* 'a' is the return value from a real kernel mmap, hence: */
201 vg_assert(VG_IS_PAGE_ALIGNED(a));
202 /* whereas len is whatever the syscall supplied. So: */
203 len = VG_PGROUNDUP(len);
204
205 rr = toBool(prot & VKI_PROT_READ);
206 ww = toBool(prot & VKI_PROT_WRITE);
207 xx = toBool(prot & VKI_PROT_EXEC);
208
209 VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
210 }
211
212
213 /* When a client mmap has been successfully done, this function must
214 be called. It notifies both aspacem and the tool of the new
215 mapping.
216
217 JRS 2008-Aug-14: But notice this is *very* obscure. The only place
218 it is called from is POST(sys_io_setup). In particular,
219 ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
220 client mmap. But it doesn't call this function; instead it does the
221 relevant notifications itself. Here, we just pass di_handle=0 to
222 notify_tool_of_mmap as we have no better information. But really this
223 function should be done away with; problem is I don't understand what
224 POST(sys_io_setup) does or how it works.
225
226 [However, this function is used lots for Darwin, because
227 ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
228 */
229 void
230 ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
231 UInt flags, Int fd, Off64T offset )
232 {
233 // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
234 // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
235 // Should it? --njn
236 notify_core_of_mmap(a, len, prot, flags, fd, offset);
237 notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
238 }
239
240 void
241 ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
242 {
243 Bool d;
244
245 page_align_addr_and_len(&a, &len);
246 d = VG_(am_notify_munmap)(a, len);
247 VG_TRACK( die_mem_munmap, a, len );
248 VG_(di_notify_munmap)( a, len );
249 if (d)
250 VG_(discard_translations)( a, (ULong)len,
251 "ML_(notify_core_and_tool_of_munmap)" );
252 }
253
254 void
255 ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
256 {
257 Bool rr = toBool(prot & VKI_PROT_READ);
258 Bool ww = toBool(prot & VKI_PROT_WRITE);
259 Bool xx = toBool(prot & VKI_PROT_EXEC);
260 Bool d;
261
262 page_align_addr_and_len(&a, &len);
263 d = VG_(am_notify_mprotect)(a, len, prot);
264 VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
265 VG_(di_notify_mprotect)( a, len, prot );
266 if (d)
267 VG_(discard_translations)( a, (ULong)len,
268 "ML_(notify_core_and_tool_of_mprotect)" );
269 }
270
271
272
273 #if HAVE_MREMAP
274 /* Expand (or shrink) an existing mapping, potentially moving it at
275 the same time (controlled by the MREMAP_MAYMOVE flag). Nightmare.
276 */
277 static
278 SysRes do_mremap( Addr old_addr, SizeT old_len,
279 Addr new_addr, SizeT new_len,
280 UWord flags, ThreadId tid )
281 {
282 # define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
283
284 Bool ok, d;
285 NSegment const* old_seg;
286 Addr advised;
287 Bool f_fixed = toBool(flags & VKI_MREMAP_FIXED);
288 Bool f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
289
290 if (0)
291 VG_(printf)("do_remap (old %#lx %lu) (new %#lx %lu) %s %s\n",
292 old_addr,old_len,new_addr,new_len,
293 flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
294 flags & VKI_MREMAP_FIXED ? "FIXED" : "");
295 if (0)
296 VG_(am_show_nsegments)(0, "do_remap: before");
297
298 if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
299 goto eINVAL;
300
301 if (!VG_IS_PAGE_ALIGNED(old_addr))
302 goto eINVAL;
303
304 old_len = VG_PGROUNDUP(old_len);
305 new_len = VG_PGROUNDUP(new_len);
306
307 if (new_len == 0)
308 goto eINVAL;
309
310 /* kernel doesn't reject this, but we do. */
311 if (old_len == 0)
312 goto eINVAL;
313
314 /* reject wraparounds */
315 if (old_addr + old_len < old_addr)
316 goto eINVAL;
317 if (f_fixed == True && new_addr + new_len < new_len)
318 goto eINVAL;
319
320 /* kernel rejects all fixed, no-move requests (which are
321 meaningless). */
322 if (f_fixed == True && f_maymove == False)
323 goto eINVAL;
324
325 /* Stay away from non-client areas. */
326 if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
327 goto eINVAL;
328
329 /* In all remaining cases, if the old range does not fall within a
330 single segment, fail. */
331 old_seg = VG_(am_find_nsegment)( old_addr );
332 if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
333 goto eINVAL;
334 if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC
335 && old_seg->kind != SkShmC)
336 goto eINVAL;
337
338 vg_assert(old_len > 0);
339 vg_assert(new_len > 0);
340 vg_assert(VG_IS_PAGE_ALIGNED(old_len));
341 vg_assert(VG_IS_PAGE_ALIGNED(new_len));
342 vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
343
344 /* There are 3 remaining cases:
345
346 * maymove == False
347
348 new space has to be at old address, so:
349 - shrink -> unmap end
350 - same size -> do nothing
351 - grow -> if can grow in-place, do so, else fail
352
353 * maymove == True, fixed == False
354
355 new space can be anywhere, so:
356 - shrink -> unmap end
357 - same size -> do nothing
358 - grow -> if can grow in-place, do so, else
359 move to anywhere large enough, else fail
360
361 * maymove == True, fixed == True
362
363 new space must be at new address, so:
364
365 - if new address is not page aligned, fail
366 - if new address range overlaps old one, fail
367 - if new address range cannot be allocated, fail
368 - else move to new address range with new size
369 - else fail
370 */
371
372 if (f_maymove == False) {
373 /* new space has to be at old address */
374 if (new_len < old_len)
375 goto shrink_in_place;
376 if (new_len > old_len)
377 goto grow_in_place_or_fail;
378 goto same_in_place;
379 }
380
381 if (f_maymove == True && f_fixed == False) {
382 /* new space can be anywhere */
383 if (new_len < old_len)
384 goto shrink_in_place;
385 if (new_len > old_len)
386 goto grow_in_place_or_move_anywhere_or_fail;
387 goto same_in_place;
388 }
389
390 if (f_maymove == True && f_fixed == True) {
391 /* new space can only be at the new address */
392 if (!VG_IS_PAGE_ALIGNED(new_addr))
393 goto eINVAL;
394 if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
395 /* no overlap */
396 } else {
397 goto eINVAL;
398 }
399 if (new_addr == 0)
400 goto eINVAL;
401 /* VG_(am_get_advisory_client_simple) interprets zero to mean
402 non-fixed, which is not what we want */
403 advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
404 if (!ok || advised != new_addr)
405 goto eNOMEM;
406 ok = VG_(am_relocate_nooverlap_client)
407 ( &d, old_addr, old_len, new_addr, new_len );
408 if (ok) {
409 VG_TRACK( copy_mem_remap, old_addr, new_addr,
410 MIN_SIZET(old_len,new_len) );
411 if (new_len > old_len)
412 VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
413 old_seg->hasR, old_seg->hasW, old_seg->hasX,
414 0/*di_handle*/ );
415 VG_TRACK(die_mem_munmap, old_addr, old_len);
416 if (d) {
417 VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
418 VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
419 }
420 return VG_(mk_SysRes_Success)( new_addr );
421 }
422 goto eNOMEM;
423 }
424
425 /* end of the 3 cases */
426 /*NOTREACHED*/ vg_assert(0);
427
428 grow_in_place_or_move_anywhere_or_fail:
429 {
430 /* try growing it in-place */
431 Addr needA = old_addr + old_len;
432 SSizeT needL = new_len - old_len;
433
434 vg_assert(needL > 0);
435 vg_assert(needA > 0);
436
437 advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
438 if (ok) {
439 /* Fixes bug #129866. */
440 ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
441 }
442 if (ok && advised == needA) {
443 const NSegment *new_seg = VG_(am_extend_map_client)( old_addr, needL );
444 if (new_seg) {
445 VG_TRACK( new_mem_mmap, needA, needL,
446 new_seg->hasR,
447 new_seg->hasW, new_seg->hasX,
448 0/*di_handle*/ );
449 return VG_(mk_SysRes_Success)( old_addr );
450 }
451 }
452
453 /* that failed. Look elsewhere. */
454 advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
455 if (ok) {
456 Bool oldR = old_seg->hasR;
457 Bool oldW = old_seg->hasW;
458 Bool oldX = old_seg->hasX;
459 /* assert new area does not overlap old */
460 vg_assert(advised+new_len-1 < old_addr
461 || advised > old_addr+old_len-1);
462 ok = VG_(am_relocate_nooverlap_client)
463 ( &d, old_addr, old_len, advised, new_len );
464 if (ok) {
465 VG_TRACK( copy_mem_remap, old_addr, advised,
466 MIN_SIZET(old_len,new_len) );
467 if (new_len > old_len)
468 VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
469 oldR, oldW, oldX, 0/*di_handle*/ );
470 VG_TRACK(die_mem_munmap, old_addr, old_len);
471 if (d) {
472 VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
473 VG_(discard_translations)( advised, new_len, "do_remap(5)" );
474 }
475 return VG_(mk_SysRes_Success)( advised );
476 }
477 }
478 goto eNOMEM;
479 }
480 /*NOTREACHED*/ vg_assert(0);
481
482 grow_in_place_or_fail:
483 {
484 Addr needA = old_addr + old_len;
485 SizeT needL = new_len - old_len;
486
487 vg_assert(needA > 0);
488
489 advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
490 if (ok) {
491 /* Fixes bug #129866. */
492 ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
493 }
494 if (!ok || advised != needA)
495 goto eNOMEM;
496 const NSegment *new_seg = VG_(am_extend_map_client)( old_addr, needL );
497 if (!new_seg)
498 goto eNOMEM;
499 VG_TRACK( new_mem_mmap, needA, needL,
500 new_seg->hasR, new_seg->hasW, new_seg->hasX,
501 0/*di_handle*/ );
502
503 return VG_(mk_SysRes_Success)( old_addr );
504 }
505 /*NOTREACHED*/ vg_assert(0);
506
507 shrink_in_place:
508 {
509 SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
510 if (sr_isError(sres))
511 return sres;
512 VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
513 if (d)
514 VG_(discard_translations)( old_addr+new_len, old_len-new_len,
515 "do_remap(7)" );
516 return VG_(mk_SysRes_Success)( old_addr );
517 }
518 /*NOTREACHED*/ vg_assert(0);
519
520 same_in_place:
521 return VG_(mk_SysRes_Success)( old_addr );
522 /*NOTREACHED*/ vg_assert(0);
523
524 eINVAL:
525 return VG_(mk_SysRes_Error)( VKI_EINVAL );
526 eNOMEM:
527 return VG_(mk_SysRes_Error)( VKI_ENOMEM );
528
529 # undef MIN_SIZET
530 }
531 #endif /* HAVE_MREMAP */
532
533
534 /* ---------------------------------------------------------------------
535 File-descriptor tracking
536 ------------------------------------------------------------------ */
537
538 /* One of these is allocated for each open file descriptor. */
539 typedef struct OpenFd
540 {
541 Int fd; /* The file descriptor */
542 HChar *pathname; /* NULL if not a regular file or unknown */
543 HChar *description; /* Description saved before close */
544 ExeContext *where; /* NULL if inherited from parent */
545 ExeContext *where_closed; /* record the last close of fd */
546 Bool fd_closed;
547 struct OpenFd *next, *prev;
548 } OpenFd;
549
550 /* List of allocated file descriptors. */
551 static OpenFd *allocated_fds = NULL;
552
553 /* Count of open file descriptors. */
554 static Int fd_count = 0;
555
556 /* Close_range caller might want to close very wide range of file descriptors,
557 up to 0U. We want to avoid iterating through such a range in a normall
558 close_range, just up to any open file descriptor. Also, unlike
559 record_fd_close_range, we assume the user might deliberately double closes
560 any file descriptors in the range, so don't warn about double close here. */
561 void ML_(record_fd_close_range)(ThreadId tid, Int from_fd)
562 {
563 OpenFd *i = allocated_fds;
564
565 if (from_fd >= VG_(fd_hard_limit))
566 return; /* Valgrind internal */
567
568 while(i) {
569 // Assume the user doesn't want a warning if this came from
570 // close_range. Just record the file descriptors not yet closed here.
571 if (i->fd >= from_fd && !i->fd_closed
572 && i->fd != VG_(log_output_sink).fd
573 && i->fd != VG_(xml_output_sink).fd) {
574 i->fd_closed = True;
575 i->where_closed = ((tid == -1)
576 ? NULL
577 : VG_(record_ExeContext)(tid,
578 0/*first_ip_delta*/));
579 fd_count--;
580 }
581 i = i->next;
582 }
583 }
584
585 struct BadCloseExtra {
586 Int fd; /* The file descriptor */
587 HChar *pathname; /* NULL if not a regular file or unknown */
588 HChar *description; /* Description of the file descriptor
589 might include the pathname */
590 ExeContext *where_closed; /* record the last close of fd */
591 ExeContext *where_opened; /* recordwhere the fd was opened */
592 };
593
594 struct NotClosedExtra {
595 Int fd;
596 HChar *pathname;
597 HChar *description;
598 };
599
600 /* Note the fact that a file descriptor was just closed. */
601 void ML_(record_fd_close)(ThreadId tid, Int fd)
602 {
603 OpenFd *i = allocated_fds;
604
605 if (fd >= VG_(fd_hard_limit))
606 return; /* Valgrind internal */
607
608 while(i) {
609 if (i->fd == fd) {
610 if (i->fd_closed) {
611 struct BadCloseExtra bce;
612 bce.fd = i->fd;
613 bce.pathname = i->pathname;
614 bce.description = i->description;
615 bce.where_opened = i->where;
616 bce.where_closed = i->where_closed;
617 VG_(maybe_record_error)(tid, FdBadClose, 0,
618 "file descriptor already closed", &bce);
619 } else {
620 i->fd_closed = True;
621 i->where_closed = ((tid == -1)
622 ? NULL
623 : VG_(record_ExeContext)(tid,
624 0/*first_ip_delta*/));
625 /* Record path/socket name, etc. In case we want to print it later,
626 for example for double close. Note that record_fd_close is
627 actually called from the PRE syscall handler, so the file
628 description is about to be closed, but hasn't yet at this
629 point. */
630 if (!i->pathname) {
631 Int val;
632 Int len = sizeof(val);
633 if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE,
634 &val, &len) == -1) {
635 HChar *pathname = VG_(malloc)("vg.record_fd_close.fd", 30);
636 VG_(snprintf)(pathname, 30, "file descriptor %d", i->fd);
637 i->description = pathname;
638 } else {
639 HChar *name = VG_(malloc)("vg.record_fd_close.sock", 256);
640 i->description = getsockdetails(i->fd, 256, name);
641 }
642 } else {
643 i->description = VG_(strdup)("vg.record_fd_close.path",
644 i->pathname);
645 }
646 fd_count--;
647 }
648 break;
649 }
650 i = i->next;
651 }
652 }
653
654 /* Note the fact that a file descriptor was just opened. If the
655 tid is -1, this indicates an inherited fd. If the pathname is NULL,
656 this either indicates a non-standard file (i.e. a pipe or socket or
657 some such thing) or that we don't know the filename. If the fd is
658 already open, then we're probably doing a dup2() to an existing fd,
659 so just overwrite the existing one. */
660 void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd,
661 const HChar *pathname)
662 {
663 OpenFd *i;
664
665 if (fd >= VG_(fd_hard_limit))
666 return; /* Valgrind internal */
667
668 /* Check to see if this fd is already open (or closed, we will just
669 override it. */
670 i = allocated_fds;
671 while (i) {
672 if (i->fd == fd) {
673 if (i->pathname) {
674 VG_(free)(i->pathname);
675 i->pathname = NULL;
676 }
677 if (i->description) {
678 VG_(free)(i->description);
679 i->description = NULL;
680 }
681 if (i->fd_closed) /* now we will open it. */
682 fd_count++;
683 break;
684 }
685 i = i->next;
686 }
687
688 /* Not already one: allocate an OpenFd */
689 if (i == NULL) {
690 i = VG_(malloc)("syswrap.rfdowgn.1", sizeof(OpenFd));
691
692 i->prev = NULL;
693 i->next = allocated_fds;
694 if(allocated_fds) allocated_fds->prev = i;
695 allocated_fds = i;
696 fd_count++;
697 }
698
699 i->fd = fd;
700 i->pathname = VG_(strdup)("syswrap.rfdowgn.2", pathname);
701 i->description = NULL; /* Only set on close. */
702 i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
703 i->fd_closed = False;
704 i->where_closed = NULL;
705 }
706
707 // Record opening of an fd, and find its name.
708 void ML_(record_fd_open_named)(ThreadId tid, Int fd)
709 {
710 const HChar* buf;
711 const HChar* name;
712 if (VG_(resolve_filename)(fd, &buf))
713 name = buf;
714 else
715 name = NULL;
716
717 ML_(record_fd_open_with_given_name)(tid, fd, name);
718 }
719
720 // Record opening of a nameless fd.
721 void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
722 {
723 ML_(record_fd_open_with_given_name)(tid, fd, NULL);
724 }
725
726 // Return if a given file descriptor is already recorded.
727 Bool ML_(fd_recorded)(Int fd)
728 {
729 OpenFd *i = allocated_fds;
730 while (i) {
731 if (i->fd == fd) {
732 if (i->fd_closed)
733 return False;
734 else
735 return True;
736 }
737 i = i->next;
738 }
739 return False;
740 }
741
742 /* Returned string must not be modified nor free'd. */
743 const HChar *ML_(find_fd_recorded_by_fd)(Int fd)
744 {
745 OpenFd *i = allocated_fds;
746
747 while (i) {
748 if (i->fd == fd) {
749 if (i->fd_closed)
750 return NULL;
751 else
752 return i->pathname;
753 }
754 i = i->next;
755 }
756
757 return NULL;
758 }
759
760 static
761 HChar *unix_to_name(struct vki_sockaddr_un *sa, UInt len, HChar *name)
762 {
763 if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
764 VG_(sprintf)(name, "<unknown>");
765 } else {
766 VG_(sprintf)(name, "%s", sa->sun_path);
767 }
768
769 return name;
770 }
771
772 static
773 HChar *inet_to_name(struct vki_sockaddr_in *sa, UInt len, HChar *name)
774 {
775 if (sa == NULL || len == 0) {
776 VG_(sprintf)(name, "<unknown>");
777 } else if (sa->sin_port == 0) {
778 VG_(sprintf)(name, "<unbound>");
779 } else {
780 UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
781 VG_(sprintf)(name, "%u.%u.%u.%u:%u",
782 (addr>>24) & 0xFF, (addr>>16) & 0xFF,
783 (addr>>8) & 0xFF, addr & 0xFF,
784 VG_(ntohs)(sa->sin_port));
785 }
786
787 return name;
788 }
789
790 static
791 void inet6_format(HChar *s, const UChar ip[16])
792 {
793 static const unsigned char V4mappedprefix[12] = {0,0,0,0,0,0,0,0,0,0,0xff,0xff};
794
795 if (!VG_(memcmp)(ip, V4mappedprefix, 12)) {
796 const struct vki_in_addr *sin_addr =
797 (const struct vki_in_addr *)(ip + 12);
798 UInt addr = VG_(ntohl)(sin_addr->s_addr);
799
800 VG_(sprintf)(s, "::ffff:%u.%u.%u.%u",
801 (addr>>24) & 0xFF, (addr>>16) & 0xFF,
802 (addr>>8) & 0xFF, addr & 0xFF);
803 } else {
804 Bool compressing = False;
805 Bool compressed = False;
806 Int len = 0;
807 Int i;
808
809 for (i = 0; i < 16; i += 2) {
810 UInt word = ((UInt)ip[i] << 8) | (UInt)ip[i+1];
811 if (word == 0 && !compressed) {
812 compressing = True;
813 } else {
814 if (compressing) {
815 compressing = False;
816 compressed = True;
817 s[len++] = ':';
818 }
819 if (i > 0) {
820 s[len++] = ':';
821 }
822 len += VG_(sprintf)(s + len, "%x", word);
823 }
824 }
825
826 if (compressing) {
827 s[len++] = ':';
828 s[len++] = ':';
829 }
830
831 s[len++] = 0;
832 }
833
834 return;
835 }
836
837 static
838 HChar *inet6_to_name(struct vki_sockaddr_in6 *sa, UInt len, HChar *name)
839 {
840 if (sa == NULL || len == 0) {
841 VG_(sprintf)(name, "<unknown>");
842 } else if (sa->sin6_port == 0) {
843 VG_(sprintf)(name, "<unbound>");
844 } else {
845 HChar addr[100]; // large enough
846 inet6_format(addr, (void *)&(sa->sin6_addr));
847 VG_(sprintf)(name, "[%s]:%u", addr, VG_(ntohs)(sa->sin6_port));
848 }
849
850 return name;
851 }
852
853 /*
854 * Try get some details about a socket.
855 * Returns the given BUF with max length LEN.
856 */
857 static
858 HChar *getsockdetails(Int fd, UInt len, HChar *buf)
859 {
860 union u {
861 struct vki_sockaddr a;
862 struct vki_sockaddr_in in;
863 struct vki_sockaddr_in6 in6;
864 struct vki_sockaddr_un un;
865 } laddr;
866 Int llen;
867
868 llen = sizeof(laddr);
869 VG_(memset)(&laddr, 0, llen);
870
871 if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
872 switch(laddr.a.sa_family) {
873 case VKI_AF_INET: {
874 HChar lname[32]; // large enough
875 HChar pname[32]; // large enough
876 struct vki_sockaddr_in paddr;
877 Int plen = sizeof(struct vki_sockaddr_in);
878
879 if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
880 VG_(snprintf)(buf, len, "AF_INET socket %d: %s <-> %s", fd,
881 inet_to_name(&(laddr.in), llen, lname),
882 inet_to_name(&paddr, plen, pname));
883 return buf;
884 } else {
885 VG_(snprintf)(buf, len, "AF_INET socket %d: %s <-> <unbound>",
886 fd, inet_to_name(&(laddr.in), llen, lname));
887 return buf;
888 }
889 }
890 case VKI_AF_INET6: {
891 HChar lname[128]; // large enough
892 HChar pname[128]; // large enough
893 struct vki_sockaddr_in6 paddr;
894 Int plen = sizeof(struct vki_sockaddr_in6);
895
896 if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
897 VG_(snprintf)(buf, len, "AF_INET6 socket %d: %s <-> %s", fd,
898 inet6_to_name(&(laddr.in6), llen, lname),
899 inet6_to_name(&paddr, plen, pname));
900 return buf;
901 } else {
902 VG_(snprintf)(buf, len, "AF_INET6 socket %d: %s <-> <unbound>",
903 fd, inet6_to_name(&(laddr.in6), llen, lname));
904 return buf;
905 }
906 }
907 case VKI_AF_UNIX: {
908 static char lname[256];
909 VG_(snprintf)(buf, len, "AF_UNIX socket %d: %s", fd,
910 unix_to_name(&(laddr.un), llen, lname));
911 return buf;
912 }
913 default:
914 VG_(snprintf)(buf, len, "pf-%d socket %d",
915 laddr.a.sa_family, fd);
916 return buf;
917 }
918 }
919
920 VG_(snprintf)(buf, len, "socket %d", fd);
921 return buf;
922 }
923
924
925 /* Dump out a summary, and a more detailed list, of open file descriptors. */
926 void VG_(show_open_fds) (const HChar* when)
927 {
928 OpenFd *i;
929 int non_std = 0;
930
931 for (i = allocated_fds; i; i = i->next) {
932 if (i->fd > 2 && i->fd_closed != True)
933 non_std++;
934 }
935
936 /* If we are running quiet and there are either no open file descriptors
937 or not tracking all fds, then don't report anything. */
938 if ((fd_count == 0
939 || ((non_std == 0) && (VG_(clo_track_fds) < 2)))
940 && (VG_(clo_verbosity) == 0))
941 return;
942
943 if (!VG_(clo_xml)) {
944 VG_(umsg)("FILE DESCRIPTORS: %d open (%d std) %s.\n",
945 fd_count, fd_count - non_std, when);
946 }
947
948 for (i = allocated_fds; i; i = i->next) {
949 if (i->fd_closed)
950 continue;
951
952 if (i->fd <= 2 && VG_(clo_track_fds) < 2)
953 continue;
954
955 struct NotClosedExtra nce;
956 /* The file descriptor was not yet closed, so the description field was
957 also not yet set. Set it now as if the file descriptor was closed at
958 this point. */
959 i->description = VG_(malloc)("vg.notclosedextra.descriptor", 256);
960 if (i->pathname) {
961 VG_(snprintf) (i->description, 256, "file descriptor %d: %s",
962 i->fd, i->pathname);
963 } else {
964 Int val;
965 Int len = sizeof(val);
966
967 if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
968 == -1) {
969 /* Don't want the : at the end in xml */
970 const HChar *colon = VG_(clo_xml) ? "" : ":";
971 VG_(sprintf)(i->description, "file descriptor %d%s", i->fd, colon);
972 } else {
973 getsockdetails(i->fd, 256, i->description);
974 }
975 }
976
977 nce.fd = i->fd;
978 nce.pathname = i->pathname;
979 nce.description = i->description;
980 VG_(unique_error) (1 /* Fake ThreadId */,
981 FdNotClosed,
982 0, /* Addr */
983 "Still Open file descriptor",
984 &nce, /* extra */
985 i->where,
986 True, /* print_error */
987 False, /* allow_GDB_attach */
988 True /* count_error */);
989
990 }
991
992 if (!VG_(clo_xml))
993 VG_(message)(Vg_UserMsg, "\n");
994 }
995
996 /* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
997 have /proc support compiled in, or a non-Linux kernel), then we need to
998 find out what file descriptors we inherited from our parent process the
999 hard way - by checking each fd in turn. */
1000 static
1001 void init_preopened_fds_without_proc_self_fd(void)
1002 {
1003 struct vki_rlimit lim;
1004 UInt count;
1005 Int i;
1006
1007 if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
1008 /* Hmm. getrlimit() failed. Now we're screwed, so just choose
1009 an arbitrarily high number. 1024 happens to be the limit in
1010 the 2.4 Linux kernels. */
1011 count = 1024;
1012 } else {
1013 count = lim.rlim_cur;
1014 }
1015
1016 for (i = 0; i < count; i++)
1017 if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
1018 ML_(record_fd_open_named)(-1, i);
1019 }
1020
1021 /* Initialize the list of open file descriptors with the file descriptors
1022 we inherited from out parent process. */
1023
1024 void VG_(init_preopened_fds)(void)
1025 {
1026 // DDD: should probably use HAVE_PROC here or similar, instead.
1027 #if defined(VGO_linux)
1028 Int ret;
1029 struct vki_dirent64 d;
1030 SysRes f;
1031
1032 f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
1033 if (sr_isError(f)) {
1034 init_preopened_fds_without_proc_self_fd();
1035 return;
1036 }
1037
1038 while ((ret = VG_(getdents64)(sr_Res(f), &d, sizeof(d))) != 0) {
1039 if (ret == -1)
1040 goto out;
1041
1042 if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
1043 HChar* s;
1044 Int fno = VG_(strtoll10)(d.d_name, &s);
1045 if (*s == '\0') {
1046 if (fno != sr_Res(f))
1047 if (VG_(clo_track_fds))
1048 ML_(record_fd_open_named)(-1, fno);
1049 } else {
1050 VG_(message)(Vg_DebugMsg,
1051 "Warning: invalid file name in /proc/self/fd: %s\n",
1052 d.d_name);
1053 }
1054 }
1055
1056 VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
1057 }
1058
1059 out:
1060 VG_(close)(sr_Res(f));
1061
1062 #elif defined(VGO_darwin) || defined(VGO_freebsd)
1063 init_preopened_fds_without_proc_self_fd();
1064
1065 #elif defined(VGO_solaris)
1066 Int ret;
1067 Char buf[VKI_MAXGETDENTS_SIZE];
1068 SysRes f;
1069
1070 f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
1071 if (sr_isError(f)) {
1072 init_preopened_fds_without_proc_self_fd();
1073 return;
1074 }
1075
1076 while ((ret = VG_(getdents64)(sr_Res(f), (struct vki_dirent64 *) buf,
1077 sizeof(buf))) > 0) {
1078 Int i = 0;
1079 while (i < ret) {
1080 /* Proceed one entry. */
1081 struct vki_dirent64 *d = (struct vki_dirent64 *) (buf + i);
1082 if (VG_(strcmp)(d->d_name, ".") && VG_(strcmp)(d->d_name, "..")) {
1083 HChar *s;
1084 Int fno = VG_(strtoll10)(d->d_name, &s);
1085 if (*s == '\0') {
1086 if (fno != sr_Res(f))
1087 if (VG_(clo_track_fds))
1088 ML_(record_fd_open_named)(-1, fno);
1089 } else {
1090 VG_(message)(Vg_DebugMsg,
1091 "Warning: invalid file name in /proc/self/fd: %s\n",
1092 d->d_name);
1093 }
1094 }
1095
1096 /* Move on the next entry. */
1097 i += d->d_reclen;
1098 }
1099 }
1100
1101 VG_(close)(sr_Res(f));
1102
1103 #else
1104 # error Unknown OS
1105 #endif
1106 }
1107
1108 Bool fd_eq_Error (VgRes res, const Error *e1, const Error *e2)
1109 {
1110 // XXX should we compare the fds?
1111 return False;
1112 }
1113
1114 void fd_before_pp_Error (const Error *err)
1115 {
1116 // Nothing to do here
1117 }
1118
1119 void fd_pp_Error (const Error *err)
1120 {
1121 const Bool xml = VG_(clo_xml);
1122 const HChar* whatpre = VG_(clo_xml) ? " <what>" : "";
1123 const HChar* whatpost = VG_(clo_xml) ? "</what>" : "";
1124 const HChar* auxpre = VG_(clo_xml) ? " <auxwhat>" : " ";
1125 const HChar* auxpost = VG_(clo_xml) ? "</auxwhat>" : "";
1126 ExeContext *where = VG_(get_error_where)(err);
1127 if (VG_(get_error_kind)(err) == FdBadClose) {
1128 if (xml) VG_(emit)(" <kind>FdBadClose</kind>\n");
1129 struct BadCloseExtra *bce = (struct BadCloseExtra *)
1130 VG_(get_error_extra)(err);
1131 if (xml) {
1132 VG_(emit)(" <fd>%d</fd>\n", bce->fd);
1133 if (bce->pathname)
1134 VG_(emit)(" <path>%s</path>\n", bce->pathname);
1135 }
1136 VG_(emit)("%sFile descriptor %d: %s is already closed%s\n",
1137 whatpre, bce->fd, bce->description, whatpost);
1138 VG_(pp_ExeContext)( VG_(get_error_where)(err) );
1139 VG_(emit)("%sPreviously closed%s\n", auxpre, auxpost);
1140 VG_(pp_ExeContext)(bce->where_closed);
1141 VG_(emit)("%sOriginally opened%s\n", auxpre, auxpost);
1142 VG_(pp_ExeContext)(bce->where_opened);
1143 } else if (VG_(get_error_kind)(err) == FdNotClosed) {
1144 if (xml) VG_(emit)(" <kind>FdNotClosed</kind>\n");
1145 struct NotClosedExtra *nce = (struct NotClosedExtra *)
1146 VG_(get_error_extra)(err);
1147 if (xml) {
1148 VG_(emit)(" <fd>%d</fd>\n", nce->fd);
1149 if (nce->pathname)
1150 VG_(emit)(" <path>%s</path>\n", nce->pathname);
1151 }
1152 VG_(emit)("%sOpen %s%s\n", whatpre, nce->description, whatpost);
1153 if (where != NULL) {
1154 VG_(pp_ExeContext)(where);
1155 if (!xml) VG_(message)(Vg_UserMsg, "\n");
1156 } else if (!xml) {
1157 VG_(message)(Vg_UserMsg, " <inherited from parent>\n");
1158 VG_(message)(Vg_UserMsg, "\n");
1159 }
1160 } else {
1161 vg_assert2 (False, "Unknown error kind: %d",
1162 VG_(get_error_kind)(err));
1163 }
1164 }
1165
1166 /* Called to see if there is any extra state to be saved with this
1167 error. Must return the size of the extra struct. */
1168 UInt fd_update_extra (const Error *err)
1169 {
1170 if (VG_(get_error_kind)(err) == FdBadClose)
1171 return sizeof (struct BadCloseExtra);
1172 else if (VG_(get_error_kind)(err) == FdNotClosed)
1173 return sizeof (struct NotClosedExtra);
1174 else {
1175 vg_assert2 (False, "Unknown error kind: %d",
1176 VG_(get_error_kind)(err));
1177 }
1178 }
1179
1180 static
1181 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
1182 const HChar *msg, Addr base, SizeT size )
1183 {
1184 HChar outmsg[VG_(strlen)(msg) + 10]; // large enough
1185 VG_(sprintf)(outmsg, "sendmsg%s", msg);
1186 PRE_MEM_READ( outmsg, base, size );
1187 }
1188
1189 static
1190 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
1191 const HChar *msg, Addr base, SizeT size )
1192 {
1193 HChar outmsg[VG_(strlen)(msg) + 10]; // large enough
1194 VG_(sprintf)(outmsg, "recvmsg%s", msg);
1195 if ( read )
1196 PRE_MEM_READ( outmsg, base, size );
1197 else
1198 PRE_MEM_WRITE( outmsg, base, size );
1199 }
1200
1201 static
1202 void post_mem_write_recvmsg ( ThreadId tid, Bool read,
1203 const HChar *fieldName, Addr base, SizeT size )
1204 {
1205 if ( !read )
1206 POST_MEM_WRITE( base, size );
1207 }
1208
1209 static
1210 void msghdr_foreachfield (
1211 ThreadId tid,
1212 const HChar *name,
1213 struct vki_msghdr *msg,
1214 UInt length,
1215 void (*foreach_func)( ThreadId, Bool, const HChar *, Addr, SizeT ),
1216 Bool rekv /* "recv" apparently shadows some header decl on OSX108 */
1217 )
1218 {
1219 HChar fieldName[VG_(strlen)(name) + 32]; // large enough.
1220 Addr a;
1221 SizeT s;
1222
1223 if ( !msg )
1224 return;
1225
1226 VG_(sprintf) ( fieldName, "(%s)", name );
1227
1228 /* FIELDPAIR helps the compiler do one call to foreach_func
1229 for consecutive (no holes) fields. */
1230 #define FIELDPAIR(f1,f2) \
1231 if (offsetof(struct vki_msghdr, f1) + sizeof(msg->f1) \
1232 == offsetof(struct vki_msghdr, f2)) \
1233 s += sizeof(msg->f2); \
1234 else { \
1235 foreach_func (tid, True, fieldName, a, s); \
1236 a = (Addr)&msg->f2; \
1237 s = sizeof(msg->f2); \
1238 }
1239
1240 a = (Addr)&msg->msg_name;
1241 s = sizeof(msg->msg_name);
1242 FIELDPAIR(msg_name, msg_namelen);
1243 FIELDPAIR(msg_namelen, msg_iov);
1244 FIELDPAIR(msg_iov, msg_iovlen);
1245 FIELDPAIR(msg_iovlen, msg_control);
1246 FIELDPAIR(msg_control, msg_controllen);
1247 foreach_func ( tid, True, fieldName, a, s);
1248 #undef FIELDPAIR
1249
1250 /* msg_flags is completely ignored for send_mesg, recv_mesg doesn't read
1251 the field, but does write to it. */
1252 if ( rekv )
1253 foreach_func ( tid, False, fieldName, (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
1254
1255 if ( ML_(safe_to_deref)(&msg->msg_name, sizeof (void *))
1256 && msg->msg_name ) {
1257 VG_(sprintf) ( fieldName, "(%s.msg_name)", name );
1258 foreach_func ( tid, False, fieldName,
1259 (Addr)msg->msg_name, msg->msg_namelen );
1260 }
1261
1262 if ( ML_(safe_to_deref)(&msg->msg_iov, sizeof (void *))
1263 && msg->msg_iov ) {
1264 struct vki_iovec *iov = msg->msg_iov;
1265 UInt i;
1266
1267 if (ML_(safe_to_deref)(&msg->msg_iovlen, sizeof (UInt))) {
1268 VG_(sprintf) ( fieldName, "(%s.msg_iov)", name );
1269 foreach_func ( tid, True, fieldName, (Addr)iov,
1270 msg->msg_iovlen * sizeof( struct vki_iovec ) );
1271
1272 for ( i = 0; i < msg->msg_iovlen && length > 0; ++i, ++iov ) {
1273 if (ML_(safe_to_deref)(&iov->iov_len, sizeof (UInt))) {
1274 UInt iov_len = iov->iov_len <= length ? iov->iov_len : length;
1275 VG_(sprintf) ( fieldName, "(%s.msg_iov[%u])", name, i );
1276 foreach_func ( tid, False, fieldName,
1277 (Addr)iov->iov_base, iov_len );
1278 length = length - iov_len;
1279 }
1280 }
1281 }
1282 }
1283
1284 if ( ML_(safe_to_deref) (&msg->msg_control, sizeof (void *))
1285 && msg->msg_control ) {
1286 VG_(sprintf) ( fieldName, "(%s.msg_control)", name );
1287 foreach_func ( tid, False, fieldName,
1288 (Addr)msg->msg_control, msg->msg_controllen );
1289 }
1290
1291 }
1292
1293 static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
1294 {
1295 struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
1296
1297 while (cm) {
1298 if (cm->cmsg_level == VKI_SOL_SOCKET
1299 && cm->cmsg_type == VKI_SCM_RIGHTS ) {
1300 Int *fds = (Int *) VKI_CMSG_DATA(cm);
1301 Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
1302 / sizeof(int);
1303 Int i;
1304
1305 for (i = 0; i < fdc; i++)
1306 if(VG_(clo_track_fds))
1307 // XXX: must we check the range on these fds with
1308 // ML_(fd_allowed)()?
1309 ML_(record_fd_open_named)(tid, fds[i]);
1310 }
1311
1312 cm = VKI_CMSG_NXTHDR(msg, cm);
1313 }
1314 }
1315
1316 /* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
1317 void ML_(pre_mem_read_sockaddr) ( ThreadId tid,
1318 const HChar *description,
1319 struct vki_sockaddr *sa, UInt salen )
1320 {
1321 HChar outmsg[VG_(strlen)( description ) + 30]; // large enough
1322 struct vki_sockaddr_un* saun = (struct vki_sockaddr_un *)sa;
1323 struct vki_sockaddr_in* sin = (struct vki_sockaddr_in *)sa;
1324 struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
1325 # ifdef VKI_AF_BLUETOOTH
1326 struct vki_sockaddr_rc* rc = (struct vki_sockaddr_rc *)sa;
1327 # endif
1328 # ifdef VKI_AF_NETLINK
1329 struct vki_sockaddr_nl* nl = (struct vki_sockaddr_nl *)sa;
1330 # endif
1331
1332 /* NULL/zero-length sockaddrs are legal */
1333 if ( sa == NULL || salen == 0 ) return;
1334
1335 VG_(sprintf) ( outmsg, description, "sa_family" );
1336 PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
1337 #if defined(VGO_freebsd)
1338 VG_(sprintf) ( outmsg, description, "sa_len" );
1339 PRE_MEM_READ( outmsg, (Addr) &sa->sa_len, sizeof(char));
1340 #endif
1341
1342 /* Don't do any extra checking if we cannot determine the sa_family. */
1343 if (! ML_(safe_to_deref) (&sa->sa_family, sizeof(vki_sa_family_t)))
1344 return;
1345
1346 switch (sa->sa_family) {
1347
1348 case VKI_AF_UNIX:
1349 if (ML_(safe_to_deref) (&saun->sun_path, sizeof (Addr))) {
1350 VG_(sprintf) ( outmsg, description, "sun_path" );
1351 PRE_MEM_RASCIIZ( outmsg, (Addr) saun->sun_path );
1352 // GrP fixme max of sun_len-2? what about nul char?
1353 }
1354 break;
1355
1356 case VKI_AF_INET:
1357 VG_(sprintf) ( outmsg, description, "sin_port" );
1358 PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
1359 VG_(sprintf) ( outmsg, description, "sin_addr" );
1360 PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
1361 break;
1362
1363 case VKI_AF_INET6:
1364 VG_(sprintf) ( outmsg, description, "sin6_port" );
1365 PRE_MEM_READ( outmsg,
1366 (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
1367 VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
1368 PRE_MEM_READ( outmsg,
1369 (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
1370 VG_(sprintf) ( outmsg, description, "sin6_addr" );
1371 PRE_MEM_READ( outmsg,
1372 (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
1373 VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
1374 PRE_MEM_READ( outmsg,
1375 (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
1376 break;
1377
1378 # ifdef VKI_AF_BLUETOOTH
1379 case VKI_AF_BLUETOOTH:
1380 VG_(sprintf) ( outmsg, description, "rc_bdaddr" );
1381 PRE_MEM_READ( outmsg, (Addr) &rc->rc_bdaddr, sizeof (rc->rc_bdaddr) );
1382 VG_(sprintf) ( outmsg, description, "rc_channel" );
1383 PRE_MEM_READ( outmsg, (Addr) &rc->rc_channel, sizeof (rc->rc_channel) );
1384 break;
1385 # endif
1386
1387 # ifdef VKI_AF_NETLINK
1388 case VKI_AF_NETLINK:
1389 VG_(sprintf)(outmsg, description, "nl_pid");
1390 PRE_MEM_READ(outmsg, (Addr)&nl->nl_pid, sizeof(nl->nl_pid));
1391 VG_(sprintf)(outmsg, description, "nl_groups");
1392 PRE_MEM_READ(outmsg, (Addr)&nl->nl_groups, sizeof(nl->nl_groups));
1393 break;
1394 # endif
1395
1396 # ifdef VKI_AF_UNSPEC
1397 case VKI_AF_UNSPEC:
1398 break;
1399 # endif
1400
1401 default:
1402 /* No specific information about this address family.
1403 Let's just check the full data following the family.
1404 Note that this can give false positive if this (unknown)
1405 struct sockaddr_???? has padding bytes between its elements. */
1406 VG_(sprintf) ( outmsg, description, "sa_data" );
1407 PRE_MEM_READ( outmsg, (Addr)&sa->sa_family + sizeof(sa->sa_family),
1408 salen - sizeof(sa->sa_family));
1409 break;
1410 }
1411 }
1412
1413 /* Dereference a pointer to a UInt. */
1414 static UInt deref_UInt ( ThreadId tid, Addr a, const HChar* s )
1415 {
1416 UInt* a_p = (UInt*)a;
1417 PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
1418 if (a_p == NULL || ! ML_(safe_to_deref) (a_p, sizeof(UInt)))
1419 return 0;
1420 else
1421 return *a_p;
1422 }
1423
1424 void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
1425 const HChar* buf_s, const HChar* buflen_s )
1426 {
1427 if (VG_(tdict).track_pre_mem_write) {
1428 UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
1429 if (buflen_in > 0) {
1430 VG_(tdict).track_pre_mem_write(
1431 Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
1432 }
1433 }
1434 }
1435
1436 void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
1437 Addr buf_p, Addr buflen_p, const HChar* s )
1438 {
1439 if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
1440 UInt buflen_out = deref_UInt( tid, buflen_p, s);
1441 if (buflen_out > 0 && buf_p != (Addr)NULL) {
1442 VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
1443 }
1444 }
1445 }
1446
1447 /* ---------------------------------------------------------------------
1448 Data seg end, for brk()
1449 ------------------------------------------------------------------ */
1450
1451 /* +--------+------------+
1452 | anon | resvn |
1453 +--------+------------+
1454
1455 ^ ^ ^
1456 | | boundary is page aligned
1457 | VG_(brk_limit) -- no alignment constraint
1458 VG_(brk_base) -- page aligned -- does not move
1459
1460 Both the anon part and the reservation part are always at least
1461 one page.
1462 */
1463
1464 /* Set the new data segment end to NEWBRK. If this succeeds, return
1465 NEWBRK, else return the current data segment end. */
1466
1467 static Addr do_brk ( Addr newbrk, ThreadId tid )
1468 {
1469 NSegment const* aseg;
1470 Addr newbrkP;
1471 SizeT delta;
1472 Bool debug = False;
1473
1474 if (debug)
1475 VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
1476 VG_(brk_base), VG_(brk_limit), newbrk);
1477
1478 if (0) VG_(am_show_nsegments)(0, "in_brk");
1479
1480 if (newbrk < VG_(brk_base))
1481 /* Clearly impossible. */
1482 goto bad;
1483
1484 if (newbrk < VG_(brk_limit)) {
1485 /* shrinking the data segment. Be lazy and don't munmap the
1486 excess area. */
1487 NSegment const * seg = VG_(am_find_nsegment)(newbrk);
1488 vg_assert(seg);
1489
1490 if (seg->hasT)
1491 VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
1492 "do_brk(shrink)" );
1493 /* Since we're being lazy and not unmapping pages, we have to
1494 zero out the area, so that if the area later comes back into
1495 circulation, it will be filled with zeroes, as if it really
1496 had been unmapped and later remapped. Be a bit paranoid and
1497 try hard to ensure we're not going to segfault by doing the
1498 write - check both ends of the range are in the same segment
1499 and that segment is writable. */
1500 NSegment const * seg2;
1501
1502 seg2 = VG_(am_find_nsegment)( VG_(brk_limit) - 1 );
1503 vg_assert(seg2);
1504
1505 if (seg == seg2 && seg->hasW)
1506 VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
1507
1508 VG_(brk_limit) = newbrk;
1509 return newbrk;
1510 }
1511
1512 /* otherwise we're expanding the brk segment. */
1513 if (VG_(brk_limit) > VG_(brk_base))
1514 aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1515 else
1516 aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
1517
1518 /* These should be assured by setup_client_dataseg in m_main. */
1519 vg_assert(aseg);
1520 vg_assert(aseg->kind == SkAnonC);
1521
1522 if (newbrk <= aseg->end + 1) {
1523 /* still fits within the anon segment. */
1524 VG_(brk_limit) = newbrk;
1525 return newbrk;
1526 }
1527
1528 newbrkP = VG_PGROUNDUP(newbrk);
1529 delta = newbrkP - (aseg->end + 1);
1530 vg_assert(delta > 0);
1531 vg_assert(VG_IS_PAGE_ALIGNED(delta));
1532
1533 Bool overflow = False;
1534 if (! VG_(am_extend_into_adjacent_reservation_client)( aseg->start, delta,
1535 &overflow)) {
1536 if (overflow) {
1537 static Bool alreadyComplained = False;
1538 if (!alreadyComplained) {
1539 alreadyComplained = True;
1540 if (VG_(clo_verbosity) > 0) {
1541 VG_(umsg)("brk segment overflow in thread #%u: "
1542 "can't grow to %#lx\n",
1543 tid, newbrkP);
1544 VG_(umsg)("(see section Limitations in user manual)\n");
1545 VG_(umsg)("NOTE: further instances of this message "
1546 "will not be shown\n");
1547 }
1548 }
1549 } else {
1550 if (VG_(clo_verbosity) > 0) {
1551 VG_(umsg)("Cannot map memory to grow brk segment in thread #%u "
1552 "to %#lx\n", tid, newbrkP);
1553 VG_(umsg)("(see section Limitations in user manual)\n");
1554 }
1555 }
1556 goto bad;
1557 }
1558
1559 VG_(brk_limit) = newbrk;
1560 return newbrk;
1561
1562 bad:
1563 return VG_(brk_limit);
1564 }
1565
1566
1567 /* ---------------------------------------------------------------------
1568 Vet file descriptors for sanity
1569 ------------------------------------------------------------------ */
1570 /*
1571 > - what does the "Bool soft" parameter mean?
1572
1573 (Tom Hughes, 3 Oct 05):
1574
1575 Whether or not to consider a file descriptor invalid if it is above
1576 the current soft limit.
1577
1578 Basically if we are testing whether a newly created file descriptor is
1579 valid (in a post handler) then we set soft to true, and if we are
1580 testing whether a file descriptor that is about to be used (in a pre
1581 handler) is valid [viz, an already-existing fd] then we set it to false.
1582
1583 The point is that if the (virtual) soft limit is lowered then any
1584 existing descriptors can still be read/written/closed etc (so long as
1585 they are below the valgrind reserved descriptors) but no new
1586 descriptors can be created above the new soft limit.
1587
1588 (jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
1589 */
1590
1591 /* Return true if we're allowed to use or create this fd */
1592 Bool ML_(fd_allowed)(Int fd, const HChar *syscallname, ThreadId tid,
1593 Bool isNewFd)
1594 {
1595 Bool allowed = True;
1596
1597 /* hard limits always apply */
1598 if (fd < 0 || fd >= VG_(fd_hard_limit))
1599 allowed = False;
1600
1601 /* hijacking the output fds is never allowed */
1602 if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
1603 allowed = False;
1604
1605 /* if creating a new fd (rather than using an existing one), the
1606 soft limit must also be observed */
1607 if (isNewFd && fd >= VG_(fd_soft_limit))
1608 allowed = False;
1609
1610 /* this looks like it ought to be included, but causes problems: */
1611 /*
1612 if (fd == 2 && VG_(debugLog_getLevel)() > 0)
1613 allowed = False;
1614 */
1615 /* The difficulty is as follows: consider a program P which expects
1616 to be able to mess with (redirect) its own stderr (fd 2).
1617 Usually to deal with P we would issue command line flags to send
1618 logging somewhere other than stderr, so as not to disrupt P.
1619 The problem is that -d unilaterally hijacks stderr with no
1620 consultation with P. And so, if this check is enabled, P will
1621 work OK normally but fail if -d is issued.
1622
1623 Basically -d is a hack and you take your chances when using it.
1624 It's very useful for low level debugging -- particularly at
1625 startup -- and having its presence change the behaviour of the
1626 client is exactly what we don't want. */
1627
1628 /* croak? */
1629 if ((!allowed) && VG_(showing_core_errors)() ) {
1630 VG_(message)(Vg_UserMsg,
1631 "Warning: invalid file descriptor %d in syscall %s()\n",
1632 fd, syscallname);
1633 if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
1634 VG_(message)(Vg_UserMsg,
1635 " Use --log-fd=<number> to select an alternative log fd.\n");
1636 if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
1637 VG_(message)(Vg_UserMsg,
1638 " Use --xml-fd=<number> to select an alternative XML "
1639 "output fd.\n");
1640 // DDD: consider always printing this stack trace, it's useful.
1641 // Also consider also making this a proper core error, ie.
1642 // suppressible and all that.
1643 if (VG_(clo_verbosity) > 1) {
1644 VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1645 }
1646 }
1647
1648 return allowed;
1649 }
1650
1651
1652 /* ---------------------------------------------------------------------
1653 Deal with a bunch of socket-related syscalls
1654 ------------------------------------------------------------------ */
1655
1656 /* ------ */
1657
1658 void
1659 ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
1660 UWord arg0, UWord arg1,
1661 UWord arg2, UWord arg3 )
1662 {
1663 /* int socketpair(int d, int type, int protocol, int sv[2]); */
1664 PRE_MEM_WRITE( "socketcall.socketpair(sv)",
1665 arg3, 2*sizeof(int) );
1666 }
1667
1668 SysRes
1669 ML_(generic_POST_sys_socketpair) ( ThreadId tid,
1670 SysRes res,
1671 UWord arg0, UWord arg1,
1672 UWord arg2, UWord arg3 )
1673 {
1674 SysRes r = res;
1675 Int fd1 = ((Int*)arg3)[0];
1676 Int fd2 = ((Int*)arg3)[1];
1677 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1678 POST_MEM_WRITE( arg3, 2*sizeof(int) );
1679 if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
1680 !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
1681 VG_(close)(fd1);
1682 VG_(close)(fd2);
1683 r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1684 } else {
1685 POST_MEM_WRITE( arg3, 2*sizeof(int) );
1686 if (VG_(clo_track_fds)) {
1687 ML_(record_fd_open_nameless)(tid, fd1);
1688 ML_(record_fd_open_nameless)(tid, fd2);
1689 }
1690 }
1691 return r;
1692 }
1693
1694 /* ------ */
1695
1696 SysRes
1697 ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
1698 {
1699 SysRes r = res;
1700 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1701 if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
1702 VG_(close)(sr_Res(res));
1703 r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1704 } else {
1705 if (VG_(clo_track_fds))
1706 ML_(record_fd_open_nameless)(tid, sr_Res(res));
1707 }
1708 return r;
1709 }
1710
1711 /* ------ */
1712
1713 void
1714 ML_(generic_PRE_sys_bind) ( ThreadId tid,
1715 UWord arg0, UWord arg1, UWord arg2 )
1716 {
1717 /* int bind(int sockfd, struct sockaddr *my_addr,
1718 int addrlen); */
1719 ML_(pre_mem_read_sockaddr) (
1720 tid, "socketcall.bind(my_addr.%s)",
1721 (struct vki_sockaddr *) arg1, arg2
1722 );
1723 }
1724
1725 /* ------ */
1726
1727 void
1728 ML_(generic_PRE_sys_accept) ( ThreadId tid,
1729 UWord arg0, UWord arg1, UWord arg2 )
1730 {
1731 /* int accept(int s, struct sockaddr *addr, int *addrlen); */
1732 Addr addr_p = arg1;
1733 Addr addrlen_p = arg2;
1734 if (addr_p != (Addr)NULL)
1735 ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
1736 "socketcall.accept(addr)",
1737 "socketcall.accept(addrlen_in)" );
1738 }
1739
1740 SysRes
1741 ML_(generic_POST_sys_accept) ( ThreadId tid,
1742 SysRes res,
1743 UWord arg0, UWord arg1, UWord arg2 )
1744 {
1745 SysRes r = res;
1746 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1747 if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
1748 VG_(close)(sr_Res(res));
1749 r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1750 } else {
1751 Addr addr_p = arg1;
1752 Addr addrlen_p = arg2;
1753 if (addr_p != (Addr)NULL)
1754 ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
1755 "socketcall.accept(addrlen_out)" );
1756 if (VG_(clo_track_fds))
1757 ML_(record_fd_open_nameless)(tid, sr_Res(res));
1758 }
1759 return r;
1760 }
1761
1762 /* ------ */
1763
1764 void
1765 ML_(generic_PRE_sys_sendto) ( ThreadId tid,
1766 UWord arg0, UWord arg1, UWord arg2,
1767 UWord arg3, UWord arg4, UWord arg5 )
1768 {
1769 /* int sendto(int s, const void *msg, int len,
1770 unsigned int flags,
1771 const struct sockaddr *to, int tolen); */
1772 PRE_MEM_READ( "socketcall.sendto(msg)",
1773 arg1, /* msg */
1774 arg2 /* len */ );
1775 ML_(pre_mem_read_sockaddr) (
1776 tid, "socketcall.sendto(to.%s)",
1777 (struct vki_sockaddr *) arg4, arg5
1778 );
1779 }
1780
1781 /* ------ */
1782
1783 void
1784 ML_(generic_PRE_sys_send) ( ThreadId tid,
1785 UWord arg0, UWord arg1, UWord arg2 )
1786 {
1787 /* int send(int s, const void *msg, size_t len, int flags); */
1788 PRE_MEM_READ( "socketcall.send(msg)",
1789 arg1, /* msg */
1790 arg2 /* len */ );
1791
1792 }
1793
1794 /* ------ */
1795
1796 void
1797 ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
1798 UWord arg0, UWord arg1, UWord arg2,
1799 UWord arg3, UWord arg4, UWord arg5 )
1800 {
1801 /* int recvfrom(int s, void *buf, int len, unsigned int flags,
1802 struct sockaddr *from, int *fromlen); */
1803 Addr buf_p = arg1;
1804 Int len = arg2;
1805 Addr from_p = arg4;
1806 Addr fromlen_p = arg5;
1807 PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
1808 if (from_p != (Addr)NULL)
1809 ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
1810 "socketcall.recvfrom(from)",
1811 "socketcall.recvfrom(fromlen_in)" );
1812 }
1813
1814 void
1815 ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
1816 SysRes res,
1817 UWord arg0, UWord arg1, UWord arg2,
1818 UWord arg3, UWord arg4, UWord arg5 )
1819 {
1820 Addr buf_p = arg1;
1821 Int len = arg2;
1822 Addr from_p = arg4;
1823 Addr fromlen_p = arg5;
1824
1825 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1826 if (from_p != (Addr)NULL)
1827 ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
1828 "socketcall.recvfrom(fromlen_out)" );
1829 POST_MEM_WRITE( buf_p, len );
1830 }
1831
1832 /* ------ */
1833
1834 void
1835 ML_(generic_PRE_sys_recv) ( ThreadId tid,
1836 UWord arg0, UWord arg1, UWord arg2 )
1837 {
1838 /* int recv(int s, void *buf, int len, unsigned int flags); */
1839 /* man 2 recv says:
1840 The recv call is normally used only on a connected socket
1841 (see connect(2)) and is identical to recvfrom with a NULL
1842 from parameter.
1843 */
1844 PRE_MEM_WRITE( "socketcall.recv(buf)",
1845 arg1, /* buf */
1846 arg2 /* len */ );
1847 }
1848
1849 void
1850 ML_(generic_POST_sys_recv) ( ThreadId tid,
1851 UWord res,
1852 UWord arg0, UWord arg1, UWord arg2 )
1853 {
1854 if (arg1 != 0) {
1855 POST_MEM_WRITE( arg1, /* buf */
1856 arg2 /* len */ );
1857 }
1858 }
1859
1860 /* ------ */
1861
1862 void
1863 ML_(generic_PRE_sys_connect) ( ThreadId tid,
1864 UWord arg0, UWord arg1, UWord arg2 )
1865 {
1866 /* int connect(int sockfd,
1867 struct sockaddr *serv_addr, int addrlen ); */
1868 ML_(pre_mem_read_sockaddr) ( tid,
1869 "socketcall.connect(serv_addr.%s)",
1870 (struct vki_sockaddr *) arg1, arg2);
1871 }
1872
1873 /* ------ */
1874
1875 void
1876 ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
1877 UWord arg0, UWord arg1, UWord arg2,
1878 UWord arg3, UWord arg4 )
1879 {
1880 /* int setsockopt(int s, int level, int optname,
1881 const void *optval, int optlen); */
1882 PRE_MEM_READ( "socketcall.setsockopt(optval)",
1883 arg3, /* optval */
1884 arg4 /* optlen */ );
1885 }
1886
1887 /* ------ */
1888
1889 void
1890 ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
1891 UWord arg0, UWord arg1, UWord arg2 )
1892 {
1893 /* int getsockname(int s, struct sockaddr* name, int* namelen) */
1894 Addr name_p = arg1;
1895 Addr namelen_p = arg2;
1896 /* Nb: name_p cannot be NULL */
1897 ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1898 "socketcall.getsockname(name)",
1899 "socketcall.getsockname(namelen_in)" );
1900 }
1901
1902 void
1903 ML_(generic_POST_sys_getsockname) ( ThreadId tid,
1904 SysRes res,
1905 UWord arg0, UWord arg1, UWord arg2 )
1906 {
1907 Addr name_p = arg1;
1908 Addr namelen_p = arg2;
1909 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1910 ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1911 "socketcall.getsockname(namelen_out)" );
1912 }
1913
1914 /* ------ */
1915
1916 void
1917 ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
1918 UWord arg0, UWord arg1, UWord arg2 )
1919 {
1920 /* int getpeername(int s, struct sockaddr* name, int* namelen) */
1921 Addr name_p = arg1;
1922 Addr namelen_p = arg2;
1923 /* Nb: name_p cannot be NULL */
1924 ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1925 "socketcall.getpeername(name)",
1926 "socketcall.getpeername(namelen_in)" );
1927 }
1928
1929 void
1930 ML_(generic_POST_sys_getpeername) ( ThreadId tid,
1931 SysRes res,
1932 UWord arg0, UWord arg1, UWord arg2 )
1933 {
1934 Addr name_p = arg1;
1935 Addr namelen_p = arg2;
1936 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1937 ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1938 "socketcall.getpeername(namelen_out)" );
1939 }
1940
1941 /* ------ */
1942
1943 void
1944 ML_(generic_PRE_sys_sendmsg) ( ThreadId tid, const HChar *name,
1945 struct vki_msghdr *msg )
1946 {
1947 msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_read_sendmsg, False );
1948 }
1949
1950 /* ------ */
1951
1952 void
1953 ML_(generic_PRE_sys_recvmsg) ( ThreadId tid, const HChar *name,
1954 struct vki_msghdr *msg )
1955 {
1956 msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_write_recvmsg, True );
1957 }
1958
1959 void
1960 ML_(generic_POST_sys_recvmsg) ( ThreadId tid, const HChar *name,
1961 struct vki_msghdr *msg, UInt length )
1962 {
1963 msghdr_foreachfield( tid, name, msg, length, post_mem_write_recvmsg, True );
1964 check_cmsg_for_fds( tid, msg );
1965 }
1966
1967
1968 /* ---------------------------------------------------------------------
1969 Deal with a bunch of IPC related syscalls
1970 ------------------------------------------------------------------ */
1971
1972 /* ------ */
1973
1974 void
1975 ML_(generic_PRE_sys_semop) ( ThreadId tid,
1976 UWord arg0, UWord arg1, UWord arg2 )
1977 {
1978 /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
1979 PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1980 }
1981
1982 /* ------ */
1983
1984 void
1985 ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
1986 UWord arg0, UWord arg1,
1987 UWord arg2, UWord arg3 )
1988 {
1989 /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
1990 struct timespec *timeout); */
1991 PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1992 if (arg3 != 0)
1993 PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
1994 }
1995
1996 /* ------ */
1997
1998 static
1999 UInt get_sem_count( Int semid )
2000 {
2001 union vki_semun arg;
2002 SysRes res;
2003
2004 # if defined(__NR_semctl)
2005 # if defined(VGO_darwin)
2006 /* Darwin has no specific 64 bit semid_ds, but has __NR_semctl. */
2007 struct vki_semid_ds buf;
2008 arg.buf = &buf;
2009 # else
2010 struct vki_semid64_ds buf;
2011 arg.buf64 = &buf;
2012 # endif
2013 res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
2014 if (sr_isError(res))
2015 return 0;
2016
2017 return buf.sem_nsems;
2018 # elif defined(__NR___semctl) /* FreeBSD */
2019 struct vki_semid_ds buf;
2020 arg.buf = &buf;
2021 res = VG_(do_syscall4)(__NR___semctl, semid, 0, VKI_IPC_STAT, (RegWord)&arg);
2022
2023 if (sr_isError(res))
2024 return 0;
2025
2026 // both clang-tidy and coverity complain about this but I think they are both wrong
2027 return buf.sem_nsems;
2028 # elif defined(__NR_semsys) /* Solaris */
2029 struct vki_semid_ds buf;
2030 arg.buf = &buf;
2031 res = VG_(do_syscall5)(__NR_semsys, VKI_SEMCTL, semid, 0, VKI_IPC_STAT,
2032 *(UWord *)&arg);
2033 if (sr_isError(res))
2034 return 0;
2035
2036 return buf.sem_nsems;
2037
2038 # else
2039 struct vki_semid_ds buf;
2040 arg.buf = &buf;
2041 res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
2042 VKI_IPC_STAT, (UWord)&arg);
2043 if (sr_isError(res))
2044 return 0;
2045
2046 return buf.sem_nsems;
2047 # endif
2048 }
2049
2050 void
2051 ML_(generic_PRE_sys_semctl) ( ThreadId tid,
2052 UWord arg0, UWord arg1,
2053 UWord arg2, UWord arg3 )
2054 {
2055 /* int semctl(int semid, int semnum, int cmd, ...); */
2056 union vki_semun arg = *(union vki_semun *)&arg3;
2057 UInt nsems;
2058 switch (arg2 /* cmd */) {
2059 #if defined(VKI_IPC_INFO)
2060 case VKI_IPC_INFO:
2061 case VKI_SEM_INFO:
2062 #if defined(VKI_IPC_64)
2063 case VKI_IPC_INFO|VKI_IPC_64:
2064 case VKI_SEM_INFO|VKI_IPC_64:
2065 #endif
2066 #if defined(VGO_freebsd)
2067 PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
2068 (Addr)arg.buf, sizeof(struct vki_semid_ds) );
2069 #else
2070 PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
2071 (Addr)arg.buf, sizeof(struct vki_seminfo) );
2072 #endif
2073 break;
2074 #endif
2075
2076 case VKI_IPC_STAT:
2077 #if defined(VKI_SEM_STAT)
2078 case VKI_SEM_STAT:
2079 #endif
2080 PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
2081 (Addr)arg.buf, sizeof(struct vki_semid_ds) );
2082 break;
2083
2084 #if defined(VKI_IPC_64)
2085 case VKI_IPC_STAT|VKI_IPC_64:
2086 #if defined(VKI_SEM_STAT)
2087 case VKI_SEM_STAT|VKI_IPC_64:
2088 #endif
2089 #endif
2090 #if defined(VKI_IPC_STAT64)
2091 case VKI_IPC_STAT64:
2092 #endif
2093 #if defined(VKI_IPC_64) || defined(VKI_IPC_STAT64)
2094 PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
2095 (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
2096 break;
2097 #endif
2098
2099 case VKI_IPC_SET:
2100 PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
2101 (Addr)arg.buf, sizeof(struct vki_semid_ds) );
2102 break;
2103
2104 #if defined(VKI_IPC_64)
2105 case VKI_IPC_SET|VKI_IPC_64:
2106 #endif
2107 #if defined(VKI_IPC_SET64)
2108 case VKI_IPC_SET64:
2109 #endif
2110 #if defined(VKI_IPC64) || defined(VKI_IPC_SET64)
2111 PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
2112 (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
2113 break;
2114 #endif
2115
2116 case VKI_GETALL:
2117 #if defined(VKI_IPC_64)
2118 case VKI_GETALL|VKI_IPC_64:
2119 #endif
2120 nsems = get_sem_count( arg0 );
2121 PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
2122 (Addr)arg.array, sizeof(unsigned short) * nsems );
2123 break;
2124
2125 case VKI_SETALL:
2126 #if defined(VKI_IPC_64)
2127 case VKI_SETALL|VKI_IPC_64:
2128 #endif
2129 nsems = get_sem_count( arg0 );
2130 PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
2131 (Addr)arg.array, sizeof(unsigned short) * nsems );
2132 break;
2133 }
2134 }
2135
2136 void
2137 ML_(generic_POST_sys_semctl) ( ThreadId tid,
2138 UWord res,
2139 UWord arg0, UWord arg1,
2140 UWord arg2, UWord arg3 )
2141 {
2142 union vki_semun arg = *(union vki_semun *)&arg3;
2143 UInt nsems;
2144 switch (arg2 /* cmd */) {
2145 #if defined(VKI_IPC_INFO)
2146 case VKI_IPC_INFO:
2147 case VKI_SEM_INFO:
2148 #if defined(VKI_IPC_64)
2149 case VKI_IPC_INFO|VKI_IPC_64:
2150 case VKI_SEM_INFO|VKI_IPC_64:
2151 #endif
2152 #if defined(VGO_freebsd)
2153 POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
2154 #else
2155 POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
2156 #endif
2157 break;
2158 #endif
2159
2160 case VKI_IPC_STAT:
2161 #if defined(VKI_SEM_STAT)
2162 case VKI_SEM_STAT:
2163 #endif
2164 POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
2165 break;
2166
2167 #if defined(VKI_IPC_64)
2168 case VKI_IPC_STAT|VKI_IPC_64:
2169 case VKI_SEM_STAT|VKI_IPC_64:
2170 #endif
2171 #if defined(VKI_IPC_STAT64)
2172 case VKI_IPC_STAT64:
2173 #endif
2174 #if defined(VKI_IPC_64) || defined(VKI_IPC_STAT64)
2175 POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
2176 break;
2177 #endif
2178
2179 case VKI_GETALL:
2180 #if defined(VKI_IPC_64)
2181 case VKI_GETALL|VKI_IPC_64:
2182 #endif
2183 nsems = get_sem_count( arg0 );
2184 POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
2185 break;
2186 }
2187 }
2188
2189 /* ------ */
2190
2191 /* ------ */
2192
2193 static
2194 SizeT get_shm_size ( Int shmid )
2195 {
2196 /*
2197 * The excluded platforms below gained direct shmctl in Linux 5.1. Keep
2198 * using ipc-multiplexed shmctl to keep compatibility with older kernel
2199 * versions.
2200 */
2201 #if defined(__NR_shmctl) && \
2202 !defined(VGP_x86_linux) && !defined(VGP_mips32_linux) && \
2203 !defined(VGP_ppc32_linux) && !defined(VGP_ppc64be_linux) && \
2204 !defined(VGP_ppc64le_linux) && !defined(VGP_s390x_linux)
2205 # ifdef VKI_IPC_64
2206 struct vki_shmid64_ds buf;
2207 /*
2208 * On Linux, the following ABIs use old shmid_ds by default with direct
2209 * shmctl and require IPC_64 for shmid64_ds (i.e. the direct syscall is
2210 * mapped to sys_old_shmctl):
2211 * alpha, arm, microblaze, mips n32/n64, xtensa
2212 * Other Linux ABIs use shmid64_ds by default and do not recognize IPC_64
2213 * with the direct shmctl syscall (but still recognize it for the
2214 * ipc-multiplexed version if that exists for the ABI).
2215 */
2216 # if defined(VGO_linux) && !defined(VGP_arm_linux) && !defined(VGP_mips64_linux)
2217 SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
2218 VKI_IPC_STAT, (UWord)&buf);
2219 # else
2220 SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
2221 VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
2222 # endif
2223 # else /* !def VKI_IPC_64 */
2224 struct vki_shmid_ds buf;
2225 SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
2226 # endif /* def VKI_IPC_64 */
2227 #elif defined(__NR_shmsys) /* Solaris */
2228 struct vki_shmid_ds buf;
2229 SysRes __res = VG_(do_syscall4)(__NR_shmsys, VKI_SHMCTL, shmid, VKI_IPC_STAT,
2230 (UWord)&buf);
2231 #else
2232 struct vki_shmid_ds buf;
2233 SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
2234 VKI_IPC_STAT, 0, (UWord)&buf);
2235 #endif
2236 if (sr_isError(__res))
2237 return 0;
2238
2239 return (SizeT) buf.shm_segsz;
2240 }
2241
2242 UWord
2243 ML_(generic_PRE_sys_shmat) ( ThreadId tid,
2244 UWord arg0, UWord arg1, UWord arg2 )
2245 {
2246 /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
2247 SizeT segmentSize = get_shm_size ( arg0 );
2248 UWord tmp;
2249 Bool ok;
2250 if (arg1 == 0) {
2251 /* arm-linux only: work around the fact that
2252 VG_(am_get_advisory_client_simple) produces something that is
2253 VKI_PAGE_SIZE aligned, whereas what we want is something
2254 VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE. Hence
2255 increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
2256 then round the result up to the next VKI_SHMLBA boundary.
2257 See bug 222545 comment 15. So far, arm-linux is the only
2258 platform where this is known to be necessary. */
2259 vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
2260 if (VKI_SHMLBA > VKI_PAGE_SIZE) {
2261 segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
2262 }
2263 tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
2264 if (ok) {
2265 if (VKI_SHMLBA > VKI_PAGE_SIZE) {
2266 arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
2267 } else {
2268 arg1 = tmp;
2269 }
2270 }
2271 }
2272 else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
2273 arg1 = 0;
2274 return arg1;
2275 }
2276
2277 void
2278 ML_(generic_POST_sys_shmat) ( ThreadId tid,
2279 UWord res,
2280 UWord arg0, UWord arg1, UWord arg2 )
2281 {
2282 SizeT segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
2283 if ( segmentSize > 0 ) {
2284 UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
2285 Bool d;
2286
2287 if (arg2 & VKI_SHM_RDONLY)
2288 prot &= ~VKI_PROT_WRITE;
2289 /* It isn't exactly correct to pass 0 for the fd and offset
2290 here. The kernel seems to think the corresponding section
2291 does have dev/ino numbers:
2292
2293 04e52000-04ec8000 rw-s 00000000 00:06 1966090 /SYSV00000000 (deleted)
2294
2295 However there is no obvious way to find them. In order to
2296 cope with the discrepancy, aspacem's sync checker omits the
2297 dev/ino correspondence check in cases where V does not know
2298 the dev/ino. */
2299 d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
2300
2301 /* we don't distinguish whether it's read-only or
2302 * read-write -- it doesn't matter really. */
2303 VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
2304 0/*di_handle*/ );
2305 if (d)
2306 VG_(discard_translations)( (Addr)res,
2307 (ULong)VG_PGROUNDUP(segmentSize),
2308 "ML_(generic_POST_sys_shmat)" );
2309 }
2310 }
2311
2312 /* ------ */
2313
2314 Bool
2315 ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
2316 {
2317 /* int shmdt(const void *shmaddr); */
2318 return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
2319 }
2320
2321 void
2322 ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
2323 {
2324 NSegment const* s = VG_(am_find_nsegment)(arg0);
2325
2326 if (s != NULL) {
2327 Addr s_start = s->start;
2328 SizeT s_len = s->end+1 - s->start;
2329 Bool d;
2330
2331 vg_assert(s->kind == SkShmC);
2332 vg_assert(s->start == arg0);
2333
2334 d = VG_(am_notify_munmap)(s_start, s_len);
2335 s = NULL; /* s is now invalid */
2336 VG_TRACK( die_mem_munmap, s_start, s_len );
2337 if (d)
2338 VG_(discard_translations)( s_start,
2339 (ULong)s_len,
2340 "ML_(generic_POST_sys_shmdt)" );
2341 }
2342 }
2343 /* ------ */
2344
2345 void
2346 ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
2347 UWord arg0, UWord arg1, UWord arg2 )
2348 {
2349 /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
2350 switch (arg1 /* cmd */) {
2351 #if defined(VKI_IPC_INFO)
2352 case VKI_IPC_INFO:
2353 # if defined(VGO_freebsd)
2354 PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
2355 arg2, sizeof(struct vki_shmid_ds) );
2356 # else
2357 PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
2358 arg2, sizeof(struct vki_shminfo) );
2359 # endif
2360 break;
2361 #if defined(VKI_IPC_64)
2362 case VKI_IPC_INFO|VKI_IPC_64:
2363 PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
2364 arg2, sizeof(struct vki_shminfo64) );
2365 break;
2366 #endif
2367 #endif
2368
2369 #if defined(VKI_SHM_INFO)
2370 case VKI_SHM_INFO:
2371 #if defined(VKI_IPC_64)
2372 case VKI_SHM_INFO|VKI_IPC_64:
2373 #endif
2374 PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
2375 arg2, sizeof(struct vki_shm_info) );
2376 break;
2377 #endif
2378
2379 case VKI_IPC_STAT:
2380 #if defined(VKI_SHM_STAT)
2381 case VKI_SHM_STAT:
2382 #endif
2383 PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
2384 arg2, sizeof(struct vki_shmid_ds) );
2385 break;
2386
2387 #if defined(VKI_IPC_64)
2388 case VKI_IPC_STAT|VKI_IPC_64:
2389 case VKI_SHM_STAT|VKI_IPC_64:
2390 PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
2391 arg2, sizeof(struct vki_shmid64_ds) );
2392 break;
2393 #endif
2394
2395 case VKI_IPC_SET:
2396 PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
2397 arg2, sizeof(struct vki_shmid_ds) );
2398 break;
2399
2400 #if defined(VKI_IPC_64)
2401 case VKI_IPC_SET|VKI_IPC_64:
2402 PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
2403 arg2, sizeof(struct vki_shmid64_ds) );
2404 break;
2405 #endif
2406 }
2407 }
2408
2409 void
2410 ML_(generic_POST_sys_shmctl) ( ThreadId tid,
2411 UWord res,
2412 UWord arg0, UWord arg1, UWord arg2 )
2413 {
2414 switch (arg1 /* cmd */) {
2415 #if defined(VKI_IPC_INFO)
2416 case VKI_IPC_INFO:
2417 # if defined(VGO_freebsd)
2418 POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
2419 # else
2420 POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
2421 # endif
2422 break;
2423 #if defined(VKI_IPC_64)
2424 case VKI_IPC_INFO|VKI_IPC_64:
2425 POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
2426 break;
2427 #endif
2428 #endif
2429
2430 #if defined(VKI_SHM_INFO)
2431 case VKI_SHM_INFO:
2432 case VKI_SHM_INFO|VKI_IPC_64:
2433 POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
2434 break;
2435 #endif
2436
2437 case VKI_IPC_STAT:
2438 #if defined(VKI_SHM_STAT)
2439 case VKI_SHM_STAT:
2440 #endif
2441 POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
2442 break;
2443
2444 #if defined(VKI_IPC_64)
2445 case VKI_IPC_STAT|VKI_IPC_64:
2446 case VKI_SHM_STAT|VKI_IPC_64:
2447 POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
2448 break;
2449 #endif
2450
2451
2452 }
2453 }
2454
2455 /* ---------------------------------------------------------------------
2456 Generic handler for mmap
2457 ------------------------------------------------------------------ */
2458
2459 /*
2460 * Although mmap is specified by POSIX and the argument are generally
2461 * consistent across platforms the precise details of the low level
2462 * argument passing conventions differ. For example:
2463 *
2464 * - On x86-linux there is mmap (aka old_mmap) which takes the
2465 * arguments in a memory block and the offset in bytes; and
2466 * mmap2 (aka sys_mmap2) which takes the arguments in the normal
2467 * way and the offset in pages.
2468 *
2469 * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
2470 * arguments in the normal way and the offset in bytes; and
2471 * mmap2 (aka sys_mmap2) which takes the arguments in the normal
2472 * way and the offset in pages.
2473 *
2474 * - On amd64-linux everything is simple and there is just the one
2475 * call, mmap (aka sys_mmap) which takes the arguments in the
2476 * normal way and the offset in bytes.
2477 *
2478 * - On s390x-linux there is mmap (aka old_mmap) which takes the
2479 * arguments in a memory block and the offset in bytes. mmap2
2480 * is also available (but not exported via unistd.h) with
2481 * arguments in a memory block and the offset in pages.
2482 *
2483 * To cope with all this we provide a generic handler function here
2484 * and then each platform implements one or more system call handlers
2485 * which call this generic routine after extracting and normalising
2486 * the arguments.
2487 */
2488
2489 SysRes
2490 ML_(generic_PRE_sys_mmap) ( ThreadId tid,
2491 UWord arg1, UWord arg2, UWord arg3,
2492 UWord arg4, UWord arg5, Off64T arg6 )
2493 {
2494 Addr advised;
2495 SysRes sres;
2496 MapRequest mreq;
2497 Bool mreq_ok;
2498
2499 # if defined(VGO_darwin)
2500 // Nb: we can't use this on Darwin, it has races:
2501 // * needs to RETRY if advisory succeeds but map fails
2502 // (could have been some other thread in a nonblocking call)
2503 // * needs to not use fixed-position mmap() on Darwin
2504 // (mmap will cheerfully smash whatever's already there, which might
2505 // be a new mapping from some other thread in a nonblocking call)
2506 VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
2507 # endif
2508
2509 if (arg2 == 0) {
2510 /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
2511 shall be established. */
2512 return VG_(mk_SysRes_Error)( VKI_EINVAL );
2513 }
2514
2515 if (!VG_IS_PAGE_ALIGNED(arg1)) {
2516 /* zap any misaligned addresses. */
2517 /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
2518 to fail. Here, we catch them all. */
2519 return VG_(mk_SysRes_Error)( VKI_EINVAL );
2520 }
2521
2522 if (!VG_IS_PAGE_ALIGNED(arg6)) {
2523 /* zap any misaligned offsets. */
2524 /* SuSV3 says: The off argument is constrained to be aligned and
2525 sized according to the value returned by sysconf() when
2526 passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
2527 return VG_(mk_SysRes_Error)( VKI_EINVAL );
2528 }
2529
2530 /* Figure out what kind of allocation constraints there are
2531 (fixed/hint/any), and ask aspacem what we should do. */
2532 mreq.start = arg1;
2533 mreq.len = arg2;
2534 if (arg4 & VKI_MAP_FIXED) {
2535 mreq.rkind = MFixed;
2536 } else
2537 #if defined(VKI_MAP_ALIGN) /* Solaris specific */
2538 if (arg4 & VKI_MAP_ALIGN) {
2539 mreq.rkind = MAlign;
2540 if (mreq.start == 0) {
2541 mreq.start = VKI_PAGE_SIZE;
2542 }
2543 /* VKI_MAP_FIXED and VKI_MAP_ALIGN don't like each other. */
2544 arg4 &= ~VKI_MAP_ALIGN;
2545 } else
2546 #endif
2547 if (arg1 != 0) {
2548 mreq.rkind = MHint;
2549 } else {
2550 mreq.rkind = MAny;
2551 }
2552
2553 /* Enquire ... */
2554 advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2555 if (!mreq_ok) {
2556 /* Our request was bounced, so we'd better fail. */
2557 return VG_(mk_SysRes_Error)( VKI_EINVAL );
2558 }
2559
2560 # if defined(VKI_MAP_32BIT)
2561 /* MAP_32BIT is royally unportable, so if the client asks for it, try our
2562 best to make it work (but without complexifying aspacemgr).
2563 If the user requested MAP_32BIT, the mmap-ed space must be in the
2564 first 2GB of the address space. So, return ENOMEM if aspacemgr
2565 advisory is above the first 2GB. If MAP_FIXED is also requested,
2566 MAP_32BIT has to be ignored.
2567 Assumption about aspacemgr behaviour: aspacemgr scans the address space
2568 from low addresses to find a free segment. No special effort is done
2569 to keep the first 2GB 'free' for this MAP_32BIT. So, this will often
2570 fail once the program has already allocated significant memory. */
2571 if ((arg4 & VKI_MAP_32BIT) && !(arg4 & VKI_MAP_FIXED)) {
2572 if (advised + arg2 >= 0x80000000)
2573 return VG_(mk_SysRes_Error)( VKI_ENOMEM );
2574 }
2575 # endif
2576
2577 /* Otherwise we're OK (so far). Install aspacem's choice of
2578 address, and let the mmap go through. */
2579 sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2580 arg4 | VKI_MAP_FIXED,
2581 arg5, arg6);
2582
2583 # if defined(VKI_MAP_32BIT)
2584 /* No recovery trial if the advisory was not accepted. */
2585 if ((arg4 & VKI_MAP_32BIT) && !(arg4 & VKI_MAP_FIXED)
2586 && sr_isError(sres)) {
2587 return VG_(mk_SysRes_Error)( VKI_ENOMEM );
2588 }
2589 # endif
2590
2591 /* A refinement: it may be that the kernel refused aspacem's choice
2592 of address. If we were originally asked for a hinted mapping,
2593 there is still a last chance: try again at any address.
2594 Hence: */
2595 if (mreq.rkind == MHint && sr_isError(sres)) {
2596 mreq.start = 0;
2597 mreq.len = arg2;
2598 mreq.rkind = MAny;
2599 advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2600 if (!mreq_ok) {
2601 /* Our request was bounced, so we'd better fail. */
2602 return VG_(mk_SysRes_Error)( VKI_EINVAL );
2603 }
2604 /* and try again with the kernel */
2605 sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2606 arg4 | VKI_MAP_FIXED,
2607 arg5, arg6);
2608 }
2609
2610 /* Yet another refinement : sometimes valgrind chooses an address
2611 which is not acceptable by the kernel. This at least happens
2612 when mmap-ing huge pages, using the flag MAP_HUGETLB.
2613 valgrind aspacem does not know about huge pages, and modifying
2614 it to handle huge pages is not straightforward (e.g. need
2615 to understand special file system mount options).
2616 So, let's just redo an mmap, without giving any constraint to
2617 the kernel. If that succeeds, check with aspacem that the returned
2618 address is acceptable.
2619 This will give a similar effect as if the user would have
2620 hinted that address.
2621 The aspacem state will be correctly updated afterwards.
2622 We however cannot do this last refinement when the user asked
2623 for a fixed mapping, as the user asked a specific address. */
2624 if (sr_isError(sres) && !(arg4 & VKI_MAP_FIXED)) {
2625 advised = 0;
2626 /* try mmap with NULL address and without VKI_MAP_FIXED
2627 to let the kernel decide. */
2628 sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2629 arg4,
2630 arg5, arg6);
2631 if (!sr_isError(sres)) {
2632 /* The kernel is supposed to know what it is doing, but let's
2633 do a last sanity check anyway, as if the chosen address had
2634 been initially hinted by the client. The whole point of this
2635 last try was to allow mmap of huge pages to succeed without
2636 making aspacem understand them, on the other hand the kernel
2637 does not know about valgrind reservations, so this mapping
2638 can end up in free space and reservations. */
2639 mreq.start = (Addr)sr_Res(sres);
2640 mreq.len = arg2;
2641 mreq.rkind = MHint;
2642 advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2643 vg_assert(mreq_ok && advised == mreq.start);
2644 }
2645 }
2646
2647 if (!sr_isError(sres)) {
2648 ULong di_handle;
2649 /* Notify aspacem. */
2650 notify_core_of_mmap(
2651 (Addr)sr_Res(sres), /* addr kernel actually assigned */
2652 arg2, /* length */
2653 arg3, /* prot */
2654 arg4, /* the original flags value */
2655 arg5, /* fd */
2656 arg6 /* offset */
2657 );
2658 /* Load symbols? */
2659 di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
2660 False/*allow_SkFileV*/, (Int)arg5 );
2661 /* Notify the tool. */
2662 notify_tool_of_mmap(
2663 (Addr)sr_Res(sres), /* addr kernel actually assigned */
2664 arg2, /* length */
2665 arg3, /* prot */
2666 di_handle /* so the tool can refer to the read debuginfo later,
2667 if it wants. */
2668 );
2669 }
2670
2671 /* Stay sane */
2672 if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
2673 vg_assert(sr_Res(sres) == arg1);
2674
2675 return sres;
2676 }
2677
2678
2679 /* ---------------------------------------------------------------------
2680 The Main Entertainment ... syscall wrappers
2681 ------------------------------------------------------------------ */
2682
2683 /* Note: the PRE() and POST() wrappers are for the actual functions
2684 implementing the system calls in the OS kernel. These mostly have
2685 names like sys_write(); a few have names like old_mmap(). See the
2686 comment for ML_(syscall_table)[] for important info about the __NR_foo
2687 constants and their relationship to the sys_foo() functions.
2688
2689 Some notes about names used for syscalls and args:
2690 - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
2691 ambiguity.
2692
2693 - For error messages, we generally use a somewhat generic name
2694 for the syscall (eg. "write" rather than "sys_write"). This should be
2695 good enough for the average user to understand what is happening,
2696 without confusing them with names like "sys_write".
2697
2698 - Also, for error messages the arg names are mostly taken from the man
2699 pages (even though many of those man pages are really for glibc
2700 functions of the same name), rather than from the OS kernel source,
2701 for the same reason -- a user presented with a "bogus foo(bar)" arg
2702 will most likely look at the "foo" man page to see which is the "bar"
2703 arg.
2704
2705 Note that we use our own vki_* types. The one exception is in
2706 PRE_REG_READn calls, where pointer types haven't been changed, because
2707 they don't need to be -- eg. for "foo*" to be used, the type foo need not
2708 be visible.
2709
2710 XXX: some of these are arch-specific, and should be factored out.
2711 */
2712
2713 #define PRE(name) DEFN_PRE_TEMPLATE(generic, name)
2714 #define POST(name) DEFN_POST_TEMPLATE(generic, name)
2715
2716 PRE(sys_exit)
2717 {
2718 ThreadState* tst;
2719 /* simple; just make this thread exit */
2720 PRINT("exit( %ld )", SARG1);
2721 PRE_REG_READ1(void, "exit", int, status);
2722 tst = VG_(get_ThreadState)(tid);
2723 /* Set the thread's status to be exiting, then claim that the
2724 syscall succeeded. */
2725 tst->exitreason = VgSrc_ExitThread;
2726 tst->os_state.exitcode = ARG1;
2727 SET_STATUS_Success(0);
2728 }
2729
2730 PRE(sys_ni_syscall)
2731 {
2732 PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
2733 VG_SYSNUM_STRING(SYSNO));
2734 PRE_REG_READ0(long, "ni_syscall");
2735 SET_STATUS_Failure( VKI_ENOSYS );
2736 }
2737
2738 PRE(sys_iopl)
2739 {
2740 PRINT("sys_iopl ( %" FMT_REGWORD "u )", ARG1);
2741 PRE_REG_READ1(long, "iopl", unsigned long, level);
2742 }
2743
2744 PRE(sys_fsync)
2745 {
2746 *flags |= SfMayBlock;
2747 PRINT("sys_fsync ( %" FMT_REGWORD "u )", ARG1);
2748 PRE_REG_READ1(long, "fsync", unsigned int, fd);
2749 }
2750
2751 PRE(sys_fdatasync)
2752 {
2753 *flags |= SfMayBlock;
2754 PRINT("sys_fdatasync ( %" FMT_REGWORD "u )", ARG1);
2755 PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
2756 }
2757
2758 PRE(sys_msync)
2759 {
2760 *flags |= SfMayBlock;
2761 PRINT("sys_msync ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u, %#"
2762 FMT_REGWORD "x )", ARG1, ARG2, ARG3);
2763 PRE_REG_READ3(long, "msync",
2764 unsigned long, start, vki_size_t, length, int, flags);
2765 PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
2766 }
2767
2768 // Nb: getpmsg() and putpmsg() are special additional syscalls used in early
2769 // versions of LiS (Linux Streams). They are not part of the kernel.
2770 // Therefore, we have to provide this type ourself, rather than getting it
2771 // from the kernel sources.
2772 struct vki_pmsg_strbuf {
2773 int maxlen; /* no. of bytes in buffer */
2774 int len; /* no. of bytes returned */
2775 vki_caddr_t buf; /* pointer to data */
2776 };
2777 PRE(sys_getpmsg)
2778 {
2779 /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
2780 struct vki_pmsg_strbuf *ctrl;
2781 struct vki_pmsg_strbuf *data;
2782 *flags |= SfMayBlock;
2783 PRINT("sys_getpmsg ( %ld, %#" FMT_REGWORD "x, %#" FMT_REGWORD "x, %#"
2784 FMT_REGWORD "x, %#" FMT_REGWORD "x )", SARG1,
2785 ARG2, ARG3, ARG4, ARG5);
2786 PRE_REG_READ5(int, "getpmsg",
2787 int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2788 int *, bandp, int *, flagsp);
2789 ctrl = (struct vki_pmsg_strbuf *)(Addr)ARG2;
2790 data = (struct vki_pmsg_strbuf *)(Addr)ARG3;
2791 if (ctrl && ctrl->maxlen > 0)
2792 PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
2793 if (data && data->maxlen > 0)
2794 PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
2795 if (ARG4)
2796 PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
2797 if (ARG5)
2798 PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
2799 }
2800 POST(sys_getpmsg)
2801 {
2802 struct vki_pmsg_strbuf *ctrl;
2803 struct vki_pmsg_strbuf *data;
2804 vg_assert(SUCCESS);
2805 ctrl = (struct vki_pmsg_strbuf *)(Addr)ARG2;
2806 data = (struct vki_pmsg_strbuf *)(Addr)ARG3;
2807 if (RES == 0 && ctrl && ctrl->len > 0) {
2808 POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
2809 }
2810 if (RES == 0 && data && data->len > 0) {
2811 POST_MEM_WRITE( (Addr)data->buf, data->len);
2812 }
2813 }
2814
2815 PRE(sys_putpmsg)
2816 {
2817 /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
2818 struct vki_pmsg_strbuf *ctrl;
2819 struct vki_pmsg_strbuf *data;
2820 *flags |= SfMayBlock;
2821 PRINT("sys_putpmsg ( %ld, %#" FMT_REGWORD "x, %#" FMT_REGWORD
2822 "x, %ld, %ld )", SARG1, ARG2, ARG3, SARG4, SARG5);
2823 PRE_REG_READ5(int, "putpmsg",
2824 int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2825 int, band, int, flags);
2826 ctrl = (struct vki_pmsg_strbuf *)(Addr)ARG2;
2827 data = (struct vki_pmsg_strbuf *)(Addr)ARG3;
2828 if (ctrl && ctrl->len > 0)
2829 PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
2830 if (data && data->len > 0)
2831 PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
2832 }
2833
2834 PRE(sys_getitimer)
2835 {
2836 struct vki_itimerval *value = (struct vki_itimerval*)(Addr)ARG2;
2837 PRINT("sys_getitimer ( %ld, %#" FMT_REGWORD "x )", SARG1, ARG2);
2838 PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
2839
2840 PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
2841 PRE_timeval_WRITE( "getitimer(&value->it_value)", &(value->it_value));
2842 }
2843
2844 POST(sys_getitimer)
2845 {
2846 if (ARG2 != (Addr)NULL) {
2847 struct vki_itimerval *value = (struct vki_itimerval*)(Addr)ARG2;
2848 POST_timeval_WRITE( &(value->it_interval) );
2849 POST_timeval_WRITE( &(value->it_value) );
2850 }
2851 }
2852
2853 PRE(sys_setitimer)
2854 {
2855 PRINT("sys_setitimer ( %ld, %#" FMT_REGWORD "x, %#" FMT_REGWORD "x )",
2856 SARG1, ARG2, ARG3);
2857 PRE_REG_READ3(long, "setitimer",
2858 int, which,
2859 struct itimerval *, value, struct itimerval *, ovalue);
2860 if (ARG2 != (Addr)NULL) {
2861 struct vki_itimerval *value = (struct vki_itimerval*)(Addr)ARG2;
2862 PRE_timeval_READ( "setitimer(&value->it_interval)",
2863 &(value->it_interval));
2864 PRE_timeval_READ( "setitimer(&value->it_value)",
2865 &(value->it_value));
2866 }
2867 if (ARG3 != (Addr)NULL) {
2868 struct vki_itimerval *ovalue = (struct vki_itimerval*)(Addr)ARG3;
2869 PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
2870 &(ovalue->it_interval));
2871 PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
2872 &(ovalue->it_value));
2873 }
2874 }
2875
2876 POST(sys_setitimer)
2877 {
2878 if (ARG3 != (Addr)NULL) {
2879 struct vki_itimerval *ovalue = (struct vki_itimerval*)(Addr)ARG3;
2880 POST_timeval_WRITE( &(ovalue->it_interval) );
2881 POST_timeval_WRITE( &(ovalue->it_value) );
2882 }
2883 }
2884
2885 PRE(sys_chroot)
2886 {
2887 PRINT("sys_chroot ( %#" FMT_REGWORD "x )", ARG1);
2888 PRE_REG_READ1(long, "chroot", const char *, path);
2889 PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
2890 }
2891
2892 PRE(sys_madvise)
2893 {
2894 *flags |= SfMayBlock;
2895 PRINT("sys_madvise ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u, %ld )",
2896 ARG1, ARG2, SARG3);
2897 PRE_REG_READ3(long, "madvise",
2898 unsigned long, start, vki_size_t, length, int, advice);
2899 }
2900
2901 #if HAVE_MREMAP
2902 PRE(sys_mremap)
2903 {
2904 // Nb: this is different to the glibc version described in the man pages,
2905 // which lacks the fifth 'new_address' argument.
2906 if (ARG4 & VKI_MREMAP_FIXED) {
2907 PRINT("sys_mremap ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u, %"
2908 FMT_REGWORD "u, %#" FMT_REGWORD "x, %#" FMT_REGWORD "x )",
2909 ARG1, ARG2, ARG3, ARG4, ARG5);
2910 PRE_REG_READ5(unsigned long, "mremap",
2911 unsigned long, old_addr, unsigned long, old_size,
2912 unsigned long, new_size, unsigned long, flags,
2913 unsigned long, new_addr);
2914 } else {
2915 PRINT("sys_mremap ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u, %"
2916 FMT_REGWORD "u, 0x%" FMT_REGWORD "x )",
2917 ARG1, ARG2, ARG3, ARG4);
2918 PRE_REG_READ4(unsigned long, "mremap",
2919 unsigned long, old_addr, unsigned long, old_size,
2920 unsigned long, new_size, unsigned long, flags);
2921 }
2922 SET_STATUS_from_SysRes(
2923 do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
2924 );
2925 }
2926 #endif /* HAVE_MREMAP */
2927
2928 PRE(sys_nice)
2929 {
2930 PRINT("sys_nice ( %ld )", SARG1);
2931 PRE_REG_READ1(long, "nice", int, inc);
2932 }
2933
2934 PRE(sys_mlock2)
2935 {
2936 *flags |= SfMayBlock;
2937 PRINT("sys_mlock2 ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u, %" FMT_REGWORD "u )", ARG1, ARG2, ARG3);
2938 PRE_REG_READ2(int, "mlock2", void*, addr, vki_size_t, len);
2939 }
2940
2941 PRE(sys_mlock)
2942 {
2943 *flags |= SfMayBlock;
2944 PRINT("sys_mlock ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u )", ARG1, ARG2);
2945 PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
2946 }
2947
2948 PRE(sys_munlock)
2949 {
2950 *flags |= SfMayBlock;
2951 PRINT("sys_munlock ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u )", ARG1, ARG2);
2952 PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
2953 }
2954
2955 PRE(sys_mlockall)
2956 {
2957 *flags |= SfMayBlock;
2958 PRINT("sys_mlockall ( %" FMT_REGWORD "x )", ARG1);
2959 PRE_REG_READ1(long, "mlockall", int, flags);
2960 }
2961
2962 PRE(sys_setpriority)
2963 {
2964 PRINT("sys_setpriority ( %ld, %ld, %ld )", SARG1, SARG2, SARG3);
2965 PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
2966 }
2967
2968 PRE(sys_getpriority)
2969 {
2970 PRINT("sys_getpriority ( %ld, %ld )", SARG1, SARG2);
2971 PRE_REG_READ2(long, "getpriority", int, which, int, who);
2972 }
2973
2974 #if !defined(VGO_freebsd)
2975 PRE(sys_pwrite64)
2976 {
2977 *flags |= SfMayBlock;
2978 #if VG_WORDSIZE == 4
2979 PRINT("sys_pwrite64 ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %"
2980 FMT_REGWORD "u, %lld )", ARG1, ARG2, ARG3, (Long)MERGE64(ARG4,ARG5));
2981 PRE_REG_READ5(ssize_t, "pwrite64",
2982 unsigned int, fd, const char *, buf, vki_size_t, count,
2983 vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2984 #elif VG_WORDSIZE == 8
2985 PRINT("sys_pwrite64 ( %lu, %#lx, %lu, %ld )",
2986 ARG1, ARG2, ARG3, SARG4);
2987 PRE_REG_READ4(ssize_t, "pwrite64",
2988 unsigned int, fd, const char *, buf, vki_size_t, count,
2989 Word, offset);
2990 #else
2991 # error Unexpected word size
2992 #endif
2993 PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
2994 }
2995 #endif
2996
2997 PRE(sys_sync)
2998 {
2999 *flags |= SfMayBlock;
3000 PRINT("sys_sync ( )");
3001 PRE_REG_READ0(long, "sync");
3002 }
3003
3004 #if !defined(VGP_nanomips_linux)
3005 PRE(sys_fstatfs)
3006 {
3007 FUSE_COMPATIBLE_MAY_BLOCK();
3008 PRINT("sys_fstatfs ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x )", ARG1, ARG2);
3009 PRE_REG_READ2(long, "fstatfs",
3010 unsigned int, fd, struct statfs *, buf);
3011 PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
3012 }
3013
3014 POST(sys_fstatfs)
3015 {
3016 POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
3017 }
3018
3019 PRE(sys_fstatfs64)
3020 {
3021 FUSE_COMPATIBLE_MAY_BLOCK();
3022 PRINT("sys_fstatfs64 ( %" FMT_REGWORD "u, %" FMT_REGWORD "u, %#"
3023 FMT_REGWORD "x )", ARG1, ARG2, ARG3);
3024 PRE_REG_READ3(long, "fstatfs64",
3025 unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
3026 PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
3027 }
3028 POST(sys_fstatfs64)
3029 {
3030 POST_MEM_WRITE( ARG3, ARG2 );
3031 }
3032 #endif
3033
3034 PRE(sys_getsid)
3035 {
3036 PRINT("sys_getsid ( %ld )", SARG1);
3037 PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
3038 }
3039
3040 #if !defined(VGO_freebsd)
3041 PRE(sys_pread64)
3042 {
3043 *flags |= SfMayBlock;
3044 #if VG_WORDSIZE == 4
3045 PRINT("sys_pread64 ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %"
3046 FMT_REGWORD "u, %lld )", ARG1, ARG2, ARG3, (Long)MERGE64(ARG4,ARG5));
3047 PRE_REG_READ5(ssize_t, "pread64",
3048 unsigned int, fd, char *, buf, vki_size_t, count,
3049 vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
3050 #elif VG_WORDSIZE == 8
3051 PRINT("sys_pread64 ( %lu, %#lx, %lu, %ld )",
3052 ARG1, ARG2, ARG3, SARG4);
3053 PRE_REG_READ4(ssize_t, "pread64",
3054 unsigned int, fd, char *, buf, vki_size_t, count,
3055 Word, offset);
3056 #else
3057 # error Unexpected word size
3058 #endif
3059 PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
3060 }
3061 POST(sys_pread64)
3062 {
3063 vg_assert(SUCCESS);
3064 if (RES > 0) {
3065 POST_MEM_WRITE( ARG2, RES );
3066 }
3067 }
3068 #endif
3069
3070 PRE(sys_mknod)
3071 {
3072 FUSE_COMPATIBLE_MAY_BLOCK();
3073 PRINT("sys_mknod ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x, %#"
3074 FMT_REGWORD "x )", ARG1, (HChar*)(Addr)ARG1, ARG2, ARG3 );
3075 PRE_REG_READ3(long, "mknod",
3076 const char *, pathname, int, mode, unsigned, dev);
3077 PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
3078 }
3079
3080 PRE(sys_flock)
3081 {
3082 *flags |= SfMayBlock;
3083 PRINT("sys_flock ( %" FMT_REGWORD "u, %" FMT_REGWORD "u )", ARG1, ARG2 );
3084 PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
3085 }
3086
3087 // Pre_read a char** argument.
3088 void ML_(pre_argv_envp)(Addr a, ThreadId tid, const HChar *s1, const HChar *s2)
3089 {
3090 while (True) {
3091 Addr a_deref;
3092 Addr* a_p = (Addr*)a;
3093 PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
3094 a_deref = *a_p;
3095 if (0 == a_deref)
3096 break;
3097 PRE_MEM_RASCIIZ( s2, a_deref );
3098 a += sizeof(char*);
3099 }
3100 }
3101
3102 static Bool i_am_the_only_thread ( void )
3103 {
3104 Int c = VG_(count_living_threads)();
3105 vg_assert(c >= 1); /* stay sane */
3106 return c == 1;
3107 }
3108
3109 /* Wait until all other threads disappear. */
3110 void VG_(reap_threads)(ThreadId self)
3111 {
3112 while (!i_am_the_only_thread()) {
3113 /* Let other thread(s) run */
3114 VG_(vg_yield)();
3115 VG_(poll_signals)(self);
3116 }
3117 vg_assert(i_am_the_only_thread());
3118 }
3119
3120 /* This handles the common part of the PRE macro for execve and execveat. */
3121 void handle_pre_sys_execve(ThreadId tid, SyscallStatus *status, Addr pathname,
3122 Addr arg_2, Addr arg_3, ExecveType execveType,
3123 Bool check_pathptr)
3124 {
3125 HChar* path = NULL; /* path to executable */
3126 HChar** envp = NULL;
3127 HChar** argv = NULL;
3128 HChar** arg2copy;
3129 HChar* launcher_basename = NULL;
3130 ThreadState* tst;
3131 Int i, j, tot_args;
3132 SysRes res;
3133 Bool setuid_allowed, trace_this_child;
3134 const char *str;
3135 char str2[30], str3[30];
3136 Addr arg_2_check = arg_2;
3137
3138 switch (execveType) {
3139 case EXECVE:
3140 str = "execve";
3141 break;
3142 case EXECVEAT:
3143 str = "execveat";
3144 break;
3145 case FEXECVE:
3146 str = "fexecve";
3147 break;
3148 default:
3149 vg_assert(False);
3150 }
3151
3152 VG_(strcpy)(str2, str);
3153 VG_(strcpy)(str3, str);
3154
3155 VG_(strcat)(str2, "(argv)");
3156 VG_(strcat)(str3, "(argv[0])");
3157
3158 /* argv[] should not be NULL and valid. */
3159 PRE_MEM_READ(str2, arg_2_check, sizeof(Addr));
3160
3161 /* argv[0] should not be NULL and valid. */
3162 if (ML_(safe_to_deref)((HChar **) (Addr)arg_2_check, sizeof(HChar *))) {
3163 Addr argv0 = *(Addr*)arg_2_check;
3164 PRE_MEM_RASCIIZ( str3, argv0 );
3165 /* The rest of argv can be NULL or a valid string pointer. */
3166 if (VG_(am_is_valid_for_client)(arg_2_check, sizeof(HChar), VKI_PROT_READ)) {
3167 arg_2_check += sizeof(HChar*);
3168 str3[VG_(strlen)(str)] = '\0';
3169 VG_(strcat)(str3, "(argv[i])");
3170 ML_(pre_argv_envp)( arg_2_check, tid, str2, str3 );
3171 }
3172 } else {
3173 SET_STATUS_Failure(VKI_EFAULT);
3174 return;
3175 }
3176 // Reset helper strings to syscall name.
3177 str2[VG_(strlen)(str)] = '\0';
3178 str3[VG_(strlen)(str)] = '\0';
3179 if (arg_3 != 0) {
3180 /* At least the terminating NULL must be addressable. */
3181 if (!ML_(safe_to_deref)((HChar **) (Addr)arg_3, sizeof(HChar *))) {
3182 SET_STATUS_Failure(VKI_EFAULT);
3183 return;
3184 }
3185 VG_(strcat)(str2, "(envp)");
3186 VG_(strcat)(str3, "(envp[i])");
3187 ML_(pre_argv_envp)( arg_3, tid, str2, str3 );
3188 }
3189
3190 vg_assert(VG_(is_valid_tid)(tid));
3191 tst = VG_(get_ThreadState)(tid);
3192
3193 /* Erk. If the exec fails, then the following will have made a
3194 mess of things which makes it hard for us to continue. The
3195 right thing to do is piece everything together again in
3196 POST(execve), but that's close to impossible. Instead, we make
3197 an effort to check that the execve will work before actually
3198 doing it. */
3199
3200 /* Check that the name at least begins in client-accessible storage.
3201 If we didn't create it ourselves in execveat. */
3202 if (check_pathptr
3203 && !VG_(am_is_valid_for_client)( pathname, 1, VKI_PROT_READ )) {
3204 SET_STATUS_Failure( VKI_EFAULT );
3205 return;
3206 }
3207
3208 // debug-only printing
3209 if (0) {
3210 VG_(printf)("pathname = %p(%s)\n", (void*)(Addr)pathname, (HChar*)(Addr)pathname);
3211 if (arg_2) {
3212 VG_(printf)("arg_2 = ");
3213 Int q;
3214 HChar** vec = (HChar**)(Addr)arg_2;
3215 for (q = 0; vec[q]; q++)
3216 VG_(printf)("%p(%s) ", vec[q], vec[q]);
3217 VG_(printf)("\n");
3218 } else {
3219 VG_(printf)("arg_2 = null\n");
3220 }
3221 }
3222
3223 // Decide whether or not we want to follow along
3224 { // Make 'child_argv' be a pointer to the child's arg vector
3225 // (skipping the exe name)
3226 const HChar** child_argv = (const HChar**)(Addr)arg_2;
3227 if (child_argv && child_argv[0] == NULL)
3228 child_argv = NULL;
3229 trace_this_child = VG_(should_we_trace_this_child)( (HChar*)(Addr)pathname,
3230 child_argv );
3231 }
3232
3233 // Do the important checks: it is a file, is executable, permissions are
3234 // ok, etc. We allow setuid executables to run only in the case when
3235 // we are not simulating them, that is, they to be run natively.
3236 setuid_allowed = trace_this_child ? False : True;
3237 res = VG_(pre_exec_check)((const HChar *)(Addr)pathname, NULL, setuid_allowed);
3238 if (sr_isError(res)) {
3239 SET_STATUS_Failure( sr_Err(res) );
3240 return;
3241 }
3242
3243 /* If we're tracing the child, and the launcher name looks bogus
3244 (possibly because launcher.c couldn't figure it out, see
3245 comments therein) then we have no option but to fail. */
3246 if (trace_this_child
3247 && (VG_(name_of_launcher) == NULL
3248 || VG_(name_of_launcher)[0] != '/')) {
3249 SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
3250 return;
3251 }
3252
3253 /* After this point, we can't recover if the execve fails. */
3254 VG_(debugLog)(1, "syswrap", "Exec of %s\n", (HChar*)(Addr)pathname);
3255
3256
3257 // Terminate gdbserver if it is active.
3258 if (VG_(clo_vgdb) != Vg_VgdbNo) {
3259 // If the child will not be traced, we need to terminate gdbserver
3260 // to cleanup the gdbserver resources (e.g. the FIFO files).
3261 // If child will be traced, we also terminate gdbserver: the new
3262 // Valgrind will start a fresh gdbserver after exec.
3263 VG_(gdbserver) (0);
3264 }
3265
3266 /* Resistance is futile. Nuke all other threads. POSIX mandates
3267 this. (Really, nuke them all, since the new process will make
3268 its own new thread.) */
3269 VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
3270 VG_(reap_threads)(tid);
3271
3272 // Set up the child's exe path.
3273 //
3274 if (trace_this_child) {
3275
3276 // We want to exec the launcher. Get its pre-remembered path.
3277 path = VG_(name_of_launcher);
3278 // VG_(name_of_launcher) should have been acquired by m_main at
3279 // startup.
3280 vg_assert(path);
3281
3282 launcher_basename = VG_(strrchr)(path, '/');
3283 if (launcher_basename == NULL || launcher_basename[1] == 0) {
3284 launcher_basename = path; // hmm, tres dubious
3285 } else {
3286 launcher_basename++;
3287 }
3288
3289 } else {
3290 path = (HChar*)(Addr)pathname;
3291 }
3292
3293 // Set up the child's environment.
3294 //
3295 // Remove the valgrind-specific stuff from the environment so the
3296 // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
3297 // This is done unconditionally, since if we are tracing the child,
3298 // the child valgrind will set up the appropriate client environment.
3299 // Nb: we make a copy of the environment before trying to mangle it
3300 // as it might be in read-only memory (this was bug #101881).
3301 //
3302 // Then, if tracing the child, set VALGRIND_LIB for it.
3303 //
3304 if (arg_3 == 0) {
3305 envp = NULL;
3306 } else {
3307 envp = VG_(env_clone)( (HChar**)(Addr)arg_3 );
3308 if (envp == NULL) goto hosed;
3309 VG_(env_remove_valgrind_env_stuff)( envp, True /*ro_strings*/, NULL );
3310 }
3311
3312 if (trace_this_child) {
3313 // Set VALGRIND_LIB in arg_3 (the environment)
3314 VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
3315 }
3316
3317 // Set up the child's args. If not tracing it, they are
3318 // simply arg_2. Otherwise, they are
3319 //
3320 // [launcher_basename] ++ VG_(args_for_valgrind) ++ [pathname] ++ arg_2[1..]
3321 //
3322 // except that the first VG_(args_for_valgrind_noexecpass) args
3323 // are omitted.
3324 //
3325 if (!trace_this_child) {
3326 argv = (HChar**)(Addr)arg_2;
3327 } else {
3328 vg_assert( VG_(args_for_valgrind) );
3329 vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
3330 vg_assert( VG_(args_for_valgrind_noexecpass)
3331 <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
3332 /* how many args in total will there be? */
3333 // launcher basename
3334 tot_args = 1;
3335 // V's args
3336 tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
3337 tot_args -= VG_(args_for_valgrind_noexecpass);
3338 // name of client exe
3339 tot_args++;
3340 // args for client exe, skipping [0]
3341 arg2copy = (HChar**)(Addr)arg_2;
3342 if (arg2copy && arg2copy[0]) {
3343 for (i = 1; arg2copy[i]; i++)
3344 tot_args++;
3345 }
3346 // allocate
3347 argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
3348 (tot_args+1) * sizeof(HChar*) );
3349 // copy
3350 j = 0;
3351 argv[j++] = launcher_basename;
3352 for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
3353 if (i < VG_(args_for_valgrind_noexecpass))
3354 continue;
3355 argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
3356 }
3357 argv[j++] = (HChar*)(Addr)pathname;
3358 if (arg2copy && arg2copy[0])
3359 for (i = 1; arg2copy[i]; i++)
3360 argv[j++] = arg2copy[i];
3361 argv[j++] = NULL;
3362 // check
3363 vg_assert(j == tot_args+1);
3364 }
3365
3366 /*
3367 Set the signal state up for exec.
3368
3369 We need to set the real signal state to make sure the exec'd
3370 process gets SIG_IGN properly.
3371
3372 Also set our real sigmask to match the client's sigmask so that
3373 the exec'd child will get the right mask. First we need to
3374 clear out any pending signals so they they don't get delivered,
3375 which would confuse things.
3376
3377 XXX This is a bug - the signals should remain pending, and be
3378 delivered to the new process after exec. There's also a
3379 race-condition, since if someone delivers us a signal between
3380 the sigprocmask and the execve, we'll still get the signal. Oh
3381 well.
3382 */
3383 {
3384 vki_sigset_t allsigs;
3385 vki_siginfo_t info;
3386
3387 /* What this loop does: it queries SCSS (the signal state that
3388 the client _thinks_ the kernel is in) by calling
3389 VG_(do_sys_sigaction), and modifies the real kernel signal
3390 state accordingly. */
3391 for (i = 1; i < VG_(max_signal); i++) {
3392 vki_sigaction_fromK_t sa_f;
3393 vki_sigaction_toK_t sa_t;
3394 VG_(do_sys_sigaction)(i, NULL, &sa_f);
3395 VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
3396 if (sa_t.ksa_handler == VKI_SIG_IGN)
3397 VG_(sigaction)(i, &sa_t, NULL);
3398 else {
3399 sa_t.ksa_handler = VKI_SIG_DFL;
3400 VG_(sigaction)(i, &sa_t, NULL);
3401 }
3402 }
3403
3404 VG_(sigfillset)(&allsigs);
3405 while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
3406 ;
3407
3408 VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
3409 }
3410
3411 if (0) {
3412 HChar **cpp;
3413 VG_(printf)("exec: %s\n", path);
3414 for (cpp = argv; cpp && *cpp; cpp++)
3415 VG_(printf)("argv: %s\n", *cpp);
3416 if (0)
3417 for (cpp = envp; cpp && *cpp; cpp++)
3418 VG_(printf)("env: %s\n", *cpp);
3419 }
3420
3421 // always execute this because it's executing valgrind, not the "target" exe
3422 SET_STATUS_from_SysRes(
3423 VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp));
3424
3425 /* If we got here, then the execve failed. We've already made way
3426 too much of a mess to continue, so we have to abort. */
3427 hosed:
3428 vg_assert(FAILURE);
3429 VG_(message)(Vg_UserMsg, "execve(%#" FMT_REGWORD "x(%s), %#" FMT_REGWORD
3430 "x, %#" FMT_REGWORD "x) failed, errno %lu\n",
3431 pathname, (HChar*)(Addr)pathname, arg_2, arg_3, ERR);
3432 VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
3433 "execve() failing, so I'm dying.\n");
3434 VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
3435 "or work out how to recover.\n");
3436 VG_(exit)(101);
3437
3438 }
3439
3440 // XXX: prototype here seemingly doesn't match the prototype for i386-linux,
3441 // but it seems to work nonetheless...
3442 PRE(sys_execve)
3443 {
3444 PRINT("sys_execve ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x, %#"
3445 FMT_REGWORD "x )", ARG1, (HChar*)(Addr)ARG1, ARG2, ARG3);
3446 PRE_REG_READ3(vki_off_t, "execve",
3447 char *, filename, char **, argv, char **, envp);
3448 PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
3449
3450 char *pathname = (char *)ARG1;
3451 Addr arg_2 = (Addr)ARG2;
3452 Addr arg_3 = (Addr)ARG3;
3453
3454 handle_pre_sys_execve(tid, status, (Addr)pathname, arg_2, arg_3, EXECVE, True);
3455 }
3456
3457 PRE(sys_access)
3458 {
3459 PRINT("sys_access ( %#" FMT_REGWORD "x(%s), %ld )", ARG1,
3460 (HChar*)(Addr)ARG1, SARG2);
3461 PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
3462 PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
3463 }
3464
3465 PRE(sys_alarm)
3466 {
3467 PRINT("sys_alarm ( %" FMT_REGWORD "u )", ARG1);
3468 PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
3469 }
3470
3471 PRE(sys_brk)
3472 {
3473 Addr brk_limit = VG_(brk_limit);
3474 Addr brk_new;
3475
3476 /* libc says: int brk(void *end_data_segment);
3477 kernel says: void* brk(void* end_data_segment); (more or less)
3478
3479 libc returns 0 on success, and -1 (and sets errno) on failure.
3480 Nb: if you ask to shrink the dataseg end below what it
3481 currently is, that always succeeds, even if the dataseg end
3482 doesn't actually change (eg. brk(0)). Unless it seg faults.
3483
3484 Kernel returns the new dataseg end. If the brk() failed, this
3485 will be unchanged from the old one. That's why calling (kernel)
3486 brk(0) gives the current dataseg end (libc brk() just returns
3487 zero in that case).
3488
3489 Both will seg fault if you shrink it back into a text segment.
3490 */
3491 PRINT("sys_brk ( %#" FMT_REGWORD "x )", ARG1);
3492 PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
3493
3494 brk_new = do_brk(ARG1, tid);
3495 SET_STATUS_Success( brk_new );
3496
3497 if (brk_new == ARG1) {
3498 /* brk() succeeded */
3499 if (brk_new < brk_limit) {
3500 /* successfully shrunk the data segment. */
3501 VG_TRACK( die_mem_brk, (Addr)ARG1,
3502 brk_limit-ARG1 );
3503 } else
3504 if (brk_new > brk_limit) {
3505 /* successfully grew the data segment */
3506 VG_TRACK( new_mem_brk, brk_limit,
3507 ARG1-brk_limit, tid );
3508 }
3509 } else {
3510 /* brk() failed */
3511 vg_assert(brk_limit == brk_new);
3512 }
3513 }
3514
3515 PRE(sys_chdir)
3516 {
3517 FUSE_COMPATIBLE_MAY_BLOCK();
3518 PRINT("sys_chdir ( %#" FMT_REGWORD "x(%s) )", ARG1,(char*)(Addr)ARG1);
3519 PRE_REG_READ1(long, "chdir", const char *, path);
3520 PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
3521 }
3522
3523 PRE(sys_chmod)
3524 {
3525 FUSE_COMPATIBLE_MAY_BLOCK();
3526 PRINT("sys_chmod ( %#" FMT_REGWORD "x(%s), %" FMT_REGWORD "u )", ARG1,
3527 (HChar*)(Addr)ARG1, ARG2);
3528 PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
3529 PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
3530 }
3531
3532 PRE(sys_chown)
3533 {
3534 FUSE_COMPATIBLE_MAY_BLOCK();
3535 PRINT("sys_chown ( %#" FMT_REGWORD "x(%s), 0x%" FMT_REGWORD "x, 0x%"
3536 FMT_REGWORD "x )", ARG1,(char*)(Addr)ARG1,ARG2,ARG3);
3537 PRE_REG_READ3(long, "chown",
3538 const char *, path, vki_uid_t, owner, vki_gid_t, group);
3539 PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
3540 }
3541
3542 PRE(sys_lchown)
3543 {
3544 FUSE_COMPATIBLE_MAY_BLOCK();
3545 PRINT("sys_lchown ( %#" FMT_REGWORD "x(%s), 0x%" FMT_REGWORD "x, 0x%"
3546 FMT_REGWORD "x )", ARG1,(char*)(Addr)ARG1,ARG2,ARG3);
3547 PRE_REG_READ3(long, "lchown",
3548 const char *, path, vki_uid_t, owner, vki_gid_t, group);
3549 PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
3550 }
3551
3552 PRE(sys_close)
3553 {
3554 FUSE_COMPATIBLE_MAY_BLOCK();
3555 PRINT("sys_close ( %" FMT_REGWORD "u )", ARG1);
3556 PRE_REG_READ1(long, "close", unsigned int, fd);
3557
3558 /* Detect and negate attempts by the client to close Valgrind's log fd */
3559 if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
3560 /* If doing -d style logging (which is to fd=2), don't
3561 allow that to be closed either. */
3562 || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
3563 SET_STATUS_Failure( VKI_EBADF );
3564 else {
3565 /* We used to do close tracking in the POST handler, but that is
3566 only called on success. Even if the close syscall fails the
3567 file descriptor is still really closed/invalid. So we do the
3568 recording and checking here. */
3569 if (VG_(clo_track_fds)) ML_(record_fd_close)(tid, ARG1);
3570 }
3571 }
3572
3573 PRE(sys_dup)
3574 {
3575 PRINT("sys_dup ( %" FMT_REGWORD "u )", ARG1);
3576 PRE_REG_READ1(long, "dup", unsigned int, oldfd);
3577 }
3578
3579 POST(sys_dup)
3580 {
3581 vg_assert(SUCCESS);
3582 if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
3583 VG_(close)(RES);
3584 SET_STATUS_Failure( VKI_EMFILE );
3585 } else {
3586 if (VG_(clo_track_fds))
3587 ML_(record_fd_open_named)(tid, RES);
3588 }
3589 }
3590
3591 PRE(sys_dup2)
3592 {
3593 PRINT("sys_dup2 ( %" FMT_REGWORD "u, %" FMT_REGWORD "u )", ARG1, ARG2);
3594 PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
3595 if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
3596 SET_STATUS_Failure( VKI_EBADF );
3597 }
3598
3599 POST(sys_dup2)
3600 {
3601 vg_assert(SUCCESS);
3602 if (VG_(clo_track_fds))
3603 ML_(record_fd_open_named)(tid, RES);
3604 }
3605
3606 PRE(sys_fchdir)
3607 {
3608 FUSE_COMPATIBLE_MAY_BLOCK();
3609 PRINT("sys_fchdir ( %" FMT_REGWORD "u )", ARG1);
3610 PRE_REG_READ1(long, "fchdir", unsigned int, fd);
3611 }
3612
3613 PRE(sys_fchown)
3614 {
3615 FUSE_COMPATIBLE_MAY_BLOCK();
3616 PRINT("sys_fchown ( %" FMT_REGWORD "u, %" FMT_REGWORD "u, %"
3617 FMT_REGWORD "u )", ARG1, ARG2, ARG3);
3618 PRE_REG_READ3(long, "fchown",
3619 unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
3620 }
3621
3622 PRE(sys_fchmod)
3623 {
3624 FUSE_COMPATIBLE_MAY_BLOCK();
3625 PRINT("sys_fchmod ( %" FMT_REGWORD "u, %" FMT_REGWORD "u )", ARG1, ARG2);
3626 PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
3627 }
3628
3629 #if !defined(VGP_nanomips_linux) && !defined (VGO_freebsd)
3630 PRE(sys_newfstat)
3631 {
3632 FUSE_COMPATIBLE_MAY_BLOCK();
3633 PRINT("sys_newfstat ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x )", ARG1, ARG2);
3634 PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
3635 PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
3636 }
3637
3638 POST(sys_newfstat)
3639 {
3640 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3641 }
3642 #endif
3643
3644 #if !defined(VGO_solaris) && !defined(VGP_arm64_linux) && \
3645 !defined(VGP_nanomips_linux)
3646 static vki_sigset_t fork_saved_mask;
3647
3648 // In Linux, the sys_fork() function varies across architectures, but we
3649 // ignore the various args it gets, and so it looks arch-neutral. Hmm.
3650 PRE(sys_fork)
3651 {
3652 Bool is_child;
3653 Int child_pid;
3654 vki_sigset_t mask;
3655
3656 PRINT("sys_fork ( )");
3657 PRE_REG_READ0(long, "fork");
3658
3659 /* Block all signals during fork, so that we can fix things up in
3660 the child without being interrupted. */
3661 VG_(sigfillset)(&mask);
3662 VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
3663
3664 VG_(do_atfork_pre)(tid);
3665
3666 SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
3667
3668 if (!SUCCESS) return;
3669
3670 #if defined(VGO_linux) || defined(VGO_freebsd)
3671 // RES is 0 for child, non-0 (the child's PID) for parent.
3672 is_child = ( RES == 0 ? True : False );
3673 child_pid = ( is_child ? -1 : RES );
3674 #elif defined(VGO_darwin)
3675 // RES is the child's pid. RESHI is 1 for child, 0 for parent.
3676 is_child = RESHI;
3677 child_pid = RES;
3678 #else
3679 # error Unknown OS
3680 #endif
3681
3682 if (is_child) {
3683 VG_(do_atfork_child)(tid);
3684
3685 /* restore signal mask */
3686 VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3687 } else {
3688 VG_(do_atfork_parent)(tid);
3689
3690 PRINT(" fork: process %d created child %d\n", VG_(getpid)(), child_pid);
3691
3692 /* restore signal mask */
3693 VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3694 }
3695 }
3696 #endif // !defined(VGO_solaris) && !defined(VGP_arm64_linux)
3697
3698 PRE(sys_ftruncate)
3699 {
3700 *flags |= SfMayBlock;
3701 PRINT("sys_ftruncate ( %" FMT_REGWORD "u, %" FMT_REGWORD "u )", ARG1, ARG2);
3702 PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
3703 }
3704
3705 PRE(sys_truncate)
3706 {
3707 *flags |= SfMayBlock;
3708 PRINT("sys_truncate ( %#" FMT_REGWORD "x(%s), %" FMT_REGWORD "u )",
3709 ARG1, (HChar*)(Addr)ARG1, ARG2);
3710 PRE_REG_READ2(long, "truncate",
3711 const char *, path, unsigned long, length);
3712 PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
3713 }
3714
3715 PRE(sys_ftruncate64)
3716 {
3717 *flags |= SfMayBlock;
3718 #if VG_WORDSIZE == 4
3719 PRINT("sys_ftruncate64 ( %" FMT_REGWORD "u, %llu )", ARG1,
3720 MERGE64(ARG2,ARG3));
3721 PRE_REG_READ3(long, "ftruncate64",
3722 unsigned int, fd,
3723 UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3724 #else
3725 PRINT("sys_ftruncate64 ( %lu, %lu )", ARG1, ARG2);
3726 PRE_REG_READ2(long, "ftruncate64",
3727 unsigned int,fd, UWord,length);
3728 #endif
3729 }
3730
3731 PRE(sys_truncate64)
3732 {
3733 *flags |= SfMayBlock;
3734 #if VG_WORDSIZE == 4
3735 PRINT("sys_truncate64 ( %#" FMT_REGWORD "x, %lld )", ARG1,
3736 (Long)MERGE64(ARG2, ARG3));
3737 PRE_REG_READ3(long, "truncate64",
3738 const char *, path,
3739 UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3740 #else
3741 PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
3742 PRE_REG_READ2(long, "truncate64",
3743 const char *,path, UWord,length);
3744 #endif
3745 PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
3746 }
3747
3748 PRE(sys_getdents)
3749 {
3750 *flags |= SfMayBlock;
3751 PRINT("sys_getdents ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %" FMT_REGWORD
3752 "u )", ARG1, ARG2, ARG3);
3753 PRE_REG_READ3(long, "getdents",
3754 unsigned int, fd, struct vki_dirent *, dirp,
3755 unsigned int, count);
3756 PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
3757 }
3758
3759 POST(sys_getdents)
3760 {
3761 vg_assert(SUCCESS);
3762 if (RES > 0)
3763 POST_MEM_WRITE( ARG2, RES );
3764 }
3765
3766 PRE(sys_getdents64)
3767 {
3768 *flags |= SfMayBlock;
3769 PRINT("sys_getdents64 ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %"
3770 FMT_REGWORD "u )",ARG1, ARG2, ARG3);
3771 PRE_REG_READ3(long, "getdents64",
3772 unsigned int, fd, struct vki_dirent64 *, dirp,
3773 unsigned int, count);
3774 PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
3775 }
3776
3777 POST(sys_getdents64)
3778 {
3779 vg_assert(SUCCESS);
3780 if (RES > 0)
3781 POST_MEM_WRITE( ARG2, RES );
3782 }
3783
3784 PRE(sys_getgroups)
3785 {
3786 PRINT("sys_getgroups ( %ld, %#" FMT_REGWORD "x )", SARG1, ARG2);
3787 PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
3788 if (ARG1 > 0)
3789 PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3790 }
3791
3792 POST(sys_getgroups)
3793 {
3794 vg_assert(SUCCESS);
3795 if (ARG1 > 0 && RES > 0)
3796 POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
3797 }
3798
3799 PRE(sys_getcwd)
3800 {
3801 // Comment from linux/fs/dcache.c:
3802 // NOTE! The user-level library version returns a character pointer.
3803 // The kernel system call just returns the length of the buffer filled
3804 // (which includes the ending '\0' character), or a negative error
3805 // value.
3806 // Is this Linux-specific? If so it should be moved to syswrap-linux.c.
3807 PRINT("sys_getcwd ( %#" FMT_REGWORD "x, %llu )", ARG1,(ULong)ARG2);
3808 PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
3809 PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
3810 }
3811
3812 POST(sys_getcwd)
3813 {
3814 vg_assert(SUCCESS);
3815 if (RES != (Addr)NULL)
3816 POST_MEM_WRITE( ARG1, RES );
3817 }
3818
3819 PRE(sys_geteuid)
3820 {
3821 PRINT("sys_geteuid ( )");
3822 PRE_REG_READ0(long, "geteuid");
3823 }
3824
3825 PRE(sys_getegid)
3826 {
3827 PRINT("sys_getegid ( )");
3828 PRE_REG_READ0(long, "getegid");
3829 }
3830
3831 PRE(sys_getgid)
3832 {
3833 PRINT("sys_getgid ( )");
3834 PRE_REG_READ0(long, "getgid");
3835 }
3836
3837 PRE(sys_getpid)
3838 {
3839 PRINT("sys_getpid ()");
3840 PRE_REG_READ0(long, "getpid");
3841 }
3842
3843 PRE(sys_getpgid)
3844 {
3845 PRINT("sys_getpgid ( %ld )", SARG1);
3846 PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
3847 }
3848
3849 PRE(sys_getpgrp)
3850 {
3851 PRINT("sys_getpgrp ()");
3852 PRE_REG_READ0(long, "getpgrp");
3853 }
3854
3855 PRE(sys_getppid)
3856 {
3857 PRINT("sys_getppid ()");
3858 PRE_REG_READ0(long, "getppid");
3859 }
3860
3861 static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
3862 {
3863 POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
3864
3865 #ifdef _RLIMIT_POSIX_FLAG
3866 // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
3867 // Unset it here to make the switch case below work correctly.
3868 a1 &= ~_RLIMIT_POSIX_FLAG;
3869 #endif
3870
3871 switch (a1) {
3872 case VKI_RLIMIT_NOFILE:
3873 ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
3874 ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
3875 break;
3876
3877 case VKI_RLIMIT_DATA:
3878 *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
3879 break;
3880
3881 case VKI_RLIMIT_STACK:
3882 *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
3883 break;
3884 }
3885 }
3886
3887 PRE(sys_old_getrlimit)
3888 {
3889 PRINT("sys_old_getrlimit ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x )",
3890 ARG1, ARG2);
3891 PRE_REG_READ2(long, "old_getrlimit",
3892 unsigned int, resource, struct rlimit *, rlim);
3893 PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3894 }
3895
3896 POST(sys_old_getrlimit)
3897 {
3898 common_post_getrlimit(tid, ARG1, ARG2);
3899 }
3900
3901 PRE(sys_getrlimit)
3902 {
3903 PRINT("sys_getrlimit ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x )", ARG1, ARG2);
3904 PRE_REG_READ2(long, "getrlimit",
3905 unsigned int, resource, struct rlimit *, rlim);
3906 PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3907 }
3908
3909 POST(sys_getrlimit)
3910 {
3911 common_post_getrlimit(tid, ARG1, ARG2);
3912 }
3913
3914 PRE(sys_getrusage)
3915 {
3916 PRINT("sys_getrusage ( %ld, %#" FMT_REGWORD "x )", SARG1, ARG2);
3917 PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
3918 PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
3919 }
3920
3921 POST(sys_getrusage)
3922 {
3923 vg_assert(SUCCESS);
3924 if (RES == 0)
3925 POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
3926 }
3927
3928 PRE(sys_gettimeofday)
3929 {
3930 PRINT("sys_gettimeofday ( %#" FMT_REGWORD "x, %#" FMT_REGWORD "x )",
3931 ARG1,ARG2);
3932 PRE_REG_READ2(long, "gettimeofday",
3933 struct timeval *, tv, struct timezone *, tz);
3934 // GrP fixme does darwin write to *tz anymore?
3935 if (ARG1 != 0)
3936 PRE_timeval_WRITE( "gettimeofday(tv)", (Addr)ARG1 );
3937 if (ARG2 != 0)
3938 PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3939 }
3940
3941 POST(sys_gettimeofday)
3942 {
3943 vg_assert(SUCCESS);
3944 if (RES == 0) {
3945 if (ARG1 != 0)
3946 POST_timeval_WRITE( (Addr)ARG1 );
3947 if (ARG2 != 0)
3948 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
3949 }
3950 }
3951
3952 PRE(sys_settimeofday)
3953 {
3954 PRINT("sys_settimeofday ( %#" FMT_REGWORD "x, %#" FMT_REGWORD "x )",
3955 ARG1,ARG2);
3956 PRE_REG_READ2(long, "settimeofday",
3957 struct timeval *, tv, struct timezone *, tz);
3958 if (ARG1 != 0)
3959 PRE_timeval_READ( "settimeofday(tv)", (Addr)ARG1 );
3960 if (ARG2 != 0) {
3961 PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3962 /* maybe should warn if tz->tz_dsttime is non-zero? */
3963 }
3964 }
3965
3966 PRE(sys_getuid)
3967 {
3968 PRINT("sys_getuid ( )");
3969 PRE_REG_READ0(long, "getuid");
3970 }
3971
3972 void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
3973 {
3974 /* We don't have any specific information on it, so
3975 try to do something reasonable based on direction and
3976 size bits. The encoding scheme is described in
3977 /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3978
3979 According to Simon Hausmann, _IOC_READ means the kernel
3980 writes a value to the ioctl value passed from the user
3981 space and the other way around with _IOC_WRITE. */
3982
3983 #if defined(VGO_solaris)
3984 /* Majority of Solaris ioctl requests does not honour direction hints. */
3985 UInt dir = _VKI_IOC_NONE;
3986 #else
3987 UInt dir = _VKI_IOC_DIR(request);
3988 #endif
3989 UInt size = _VKI_IOC_SIZE(request);
3990
3991 if (SimHintiS(SimHint_lax_ioctls, VG_(clo_sim_hints))) {
3992 /*
3993 * Be very lax about ioctl handling; the only
3994 * assumption is that the size is correct. Doesn't
3995 * require the full buffer to be initialized when
3996 * writing. Without this, using some device
3997 * drivers with a large number of strange ioctl
3998 * commands becomes very tiresome.
3999 */
4000 } else if (dir == _VKI_IOC_NONE && size > 0) {
4001 static UWord unknown_ioctl[10];
4002 static Int moans = sizeof(unknown_ioctl) / sizeof(unknown_ioctl[0]);
4003
4004 if (moans > 0 && !VG_(clo_xml)) {
4005 /* Check if have not already moaned for this request. */
4006 UInt i;
4007 for (i = 0; i < sizeof(unknown_ioctl)/sizeof(unknown_ioctl[0]); i++) {
4008 if (unknown_ioctl[i] == request)
4009 break;
4010 if (unknown_ioctl[i] == 0) {
4011 unknown_ioctl[i] = request;
4012 moans--;
4013 VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
4014 " with no direction hints.\n", request);
4015 VG_(umsg)(" This could cause spurious value errors to appear.\n");
4016 VG_(umsg)(" See README_MISSING_SYSCALL_OR_IOCTL for "
4017 "guidance on writing a proper wrapper.\n" );
4018 //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
4019 return;
4020 }
4021 }
4022 }
4023 } else {
4024 //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
4025 //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
4026 if ((dir & _VKI_IOC_WRITE) && size > 0)
4027 PRE_MEM_READ( "ioctl(generic)", arg, size);
4028 if ((dir & _VKI_IOC_READ) && size > 0)
4029 PRE_MEM_WRITE( "ioctl(generic)", arg, size);
4030 }
4031 }
4032
4033 void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
4034 {
4035 /* We don't have any specific information on it, so
4036 try to do something reasonable based on direction and
4037 size bits. The encoding scheme is described in
4038 /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
4039
4040 According to Simon Hausmann, _IOC_READ means the kernel
4041 writes a value to the ioctl value passed from the user
4042 space and the other way around with _IOC_WRITE. */
4043
4044 UInt dir = _VKI_IOC_DIR(request);
4045 UInt size = _VKI_IOC_SIZE(request);
4046 if (size > 0 && (dir & _VKI_IOC_READ)
4047 && res == 0
4048 && arg != (Addr)NULL) {
4049 POST_MEM_WRITE(arg, size);
4050 }
4051 }
4052
4053 /*
4054 If we're sending a SIGKILL to one of our own threads, then simulate
4055 it rather than really sending the signal, so that the target thread
4056 gets a chance to clean up. Returns True if we did the killing (or
4057 no killing is necessary), and False if the caller should use the
4058 normal kill syscall.
4059
4060 "pid" is any pid argument which can be passed to kill; group kills
4061 (< -1, 0), and owner kills (-1) are ignored, on the grounds that
4062 they'll most likely hit all the threads and we won't need to worry
4063 about cleanup. In truth, we can't fully emulate these multicast
4064 kills.
4065
4066 "tgid" is a thread group id. If it is not -1, then the target
4067 thread must be in that thread group.
4068 */
4069 Bool ML_(do_sigkill)(Int pid, Int tgid)
4070 {
4071 ThreadState *tst;
4072 ThreadId tid;
4073
4074 if (pid <= 0)
4075 return False;
4076
4077 tid = VG_(lwpid_to_vgtid)(pid);
4078 if (tid == VG_INVALID_THREADID)
4079 return False; /* none of our threads */
4080
4081 tst = VG_(get_ThreadState)(tid);
4082 if (tst == NULL || tst->status == VgTs_Empty)
4083 return False; /* hm, shouldn't happen */
4084
4085 if (tgid != -1 && tst->os_state.threadgroup != tgid)
4086 return False; /* not the right thread group */
4087
4088 /* Fatal SIGKILL sent to one of our threads.
4089 "Handle" the signal ourselves, as trying to have tid
4090 handling the signal causes termination problems (see #409367
4091 and #409141).
4092 Moreover, as a process cannot do anything when receiving SIGKILL,
4093 it is not particularly crucial that "tid" does the work to
4094 terminate the process. */
4095
4096 if (VG_(clo_trace_signals))
4097 VG_(message)(Vg_DebugMsg,
4098 "Thread %u %s being killed with SIGKILL, running tid: %u\n",
4099 tst->tid, VG_(name_of_ThreadStatus) (tst->status), VG_(running_tid));
4100
4101 if (!VG_(is_running_thread)(tid))
4102 tst = VG_(get_ThreadState)(VG_(running_tid));
4103 VG_(nuke_all_threads_except) (VG_(running_tid), VgSrc_FatalSig);
4104 VG_(reap_threads)(VG_(running_tid));
4105 tst->exitreason = VgSrc_FatalSig;
4106 tst->os_state.fatalsig = VKI_SIGKILL;
4107
4108 return True;
4109 }
4110
4111 PRE(sys_kill)
4112 {
4113 PRINT("sys_kill ( %ld, %ld )", SARG1, SARG2);
4114 PRE_REG_READ2(long, "kill", int, pid, int, signal);
4115 if (!ML_(client_signal_OK)(ARG2)) {
4116 SET_STATUS_Failure( VKI_EINVAL );
4117 return;
4118 }
4119
4120 /* If we're sending SIGKILL, check to see if the target is one of
4121 our threads and handle it specially. */
4122 if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
4123 SET_STATUS_Success(0);
4124 else
4125 /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
4126 affecting how posix-compliant the call is. I guess it is
4127 harmless to pass the 3rd arg on other platforms; hence pass
4128 it on all. */
4129 SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
4130
4131 if (VG_(clo_trace_signals))
4132 VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
4133 SARG2, SARG1);
4134
4135 /* This kill might have given us a pending signal. Ask for a check once
4136 the syscall is done. */
4137 *flags |= SfPollAfter;
4138 }
4139
4140 PRE(sys_link)
4141 {
4142 *flags |= SfMayBlock;
4143 PRINT("sys_link ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x(%s) )", ARG1,
4144 (char*)(Addr)ARG1,ARG2,(char*)(Addr)ARG2);
4145 PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
4146 PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
4147 PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
4148 }
4149
4150 #if !defined(VGP_nanomips_linux) && !defined(VGO_freebsd)
4151 PRE(sys_newlstat)
4152 {
4153 PRINT("sys_newlstat ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x )", ARG1,
4154 (char*)(Addr)ARG1,ARG2);
4155 PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
4156 PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
4157 PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
4158 }
4159
4160 POST(sys_newlstat)
4161 {
4162 vg_assert(SUCCESS);
4163 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
4164 }
4165 #endif
4166
4167 PRE(sys_mkdir)
4168 {
4169 *flags |= SfMayBlock;
4170 PRINT("sys_mkdir ( %#" FMT_REGWORD "x(%s), %ld )", ARG1,
4171 (HChar*)(Addr)ARG1, SARG2);
4172 PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
4173 PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
4174 }
4175
4176 PRE(sys_mprotect)
4177 {
4178 PRINT("sys_mprotect ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u, %"
4179 FMT_REGWORD "u )", ARG1, ARG2, ARG3);
4180 PRE_REG_READ3(long, "mprotect",
4181 unsigned long, addr, vki_size_t, len, unsigned long, prot);
4182
4183 Addr addr = ARG1;
4184 SizeT len = ARG2;
4185 Int prot = ARG3;
4186
4187 handle_sys_mprotect (tid, status, &addr, &len, &prot);
4188
4189 ARG1 = addr;
4190 ARG2 = len;
4191 ARG3 = prot;
4192 }
4193 /* This will be called from the generic mprotect, or the linux specific
4194 pkey_mprotect. Pass pointers to ARG1, ARG2 and ARG3 as addr, len and prot,
4195 they might be adjusted and have to assigned back to ARG1, ARG2 and ARG3. */
4196 void handle_sys_mprotect(ThreadId tid, SyscallStatus* status,
4197 Addr *addr, SizeT *len, Int *prot)
4198 {
4199 if (!ML_(valid_client_addr)(*addr, *len, tid, "mprotect")) {
4200 #if defined(VGO_freebsd)
4201 SET_STATUS_Failure( VKI_EINVAL );
4202 #else
4203 SET_STATUS_Failure( VKI_ENOMEM );
4204 #endif
4205 }
4206 #if defined(VKI_PROT_GROWSDOWN)
4207 else
4208 if (*prot & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
4209 /* Deal with mprotects on growable stack areas.
4210
4211 The critical files to understand all this are mm/mprotect.c
4212 in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
4213 glibc.
4214
4215 The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
4216 round the start/end address of mprotect to the start/end of
4217 the underlying vma and glibc uses that as an easy way to
4218 change the protection of the stack by calling mprotect on the
4219 last page of the stack with PROT_GROWSDOWN set.
4220
4221 The sanity check provided by the kernel is that the vma must
4222 have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate. */
4223 UInt grows = *prot & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
4224 NSegment const *aseg = VG_(am_find_nsegment)(*addr);
4225 NSegment const *rseg;
4226
4227 vg_assert(aseg);
4228
4229 if (grows == VKI_PROT_GROWSDOWN) {
4230 rseg = VG_(am_next_nsegment)( aseg, False/*backwards*/ );
4231 if (rseg
4232 && rseg->kind == SkResvn
4233 && rseg->smode == SmUpper
4234 && rseg->end+1 == aseg->start) {
4235 Addr end = *addr + *len;
4236 *addr = aseg->start;
4237 *len = end - aseg->start;
4238 *prot &= ~VKI_PROT_GROWSDOWN;
4239 } else {
4240 SET_STATUS_Failure( VKI_EINVAL );
4241 }
4242 } else if (grows == VKI_PROT_GROWSUP) {
4243 rseg = VG_(am_next_nsegment)( aseg, True/*forwards*/ );
4244 if (rseg
4245 && rseg->kind == SkResvn
4246 && rseg->smode == SmLower
4247 && aseg->end+1 == rseg->start) {
4248 *len = aseg->end - *addr + 1;
4249 *prot &= ~VKI_PROT_GROWSUP;
4250 } else {
4251 SET_STATUS_Failure( VKI_EINVAL );
4252 }
4253 } else {
4254 /* both GROWSUP and GROWSDOWN */
4255 SET_STATUS_Failure( VKI_EINVAL );
4256 }
4257 }
4258 #endif // defined(VKI_PROT_GROWSDOWN)
4259 }
4260
4261 POST(sys_mprotect)
4262 {
4263 Addr a = ARG1;
4264 SizeT len = ARG2;
4265 Int prot = ARG3;
4266
4267 ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
4268 }
4269
4270 PRE(sys_munmap)
4271 {
4272 if (0) VG_(printf)(" munmap( %#" FMT_REGWORD "x )\n", ARG1);
4273 PRINT("sys_munmap ( %#" FMT_REGWORD "x, %llu )", ARG1,(ULong)ARG2);
4274 PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
4275
4276 if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
4277 SET_STATUS_Failure( VKI_EINVAL );
4278 }
4279
4280 POST(sys_munmap)
4281 {
4282 Addr a = ARG1;
4283 SizeT len = ARG2;
4284
4285 ML_(notify_core_and_tool_of_munmap)( a, len );
4286 }
4287
4288 PRE(sys_mincore)
4289 {
4290 PRINT("sys_mincore ( %#" FMT_REGWORD "x, %llu, %#" FMT_REGWORD "x )",
4291 ARG1, (ULong)ARG2, ARG3);
4292 PRE_REG_READ3(long, "mincore",
4293 unsigned long, start, vki_size_t, length,
4294 unsigned char *, vec);
4295 PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
4296 }
4297 POST(sys_mincore)
4298 {
4299 POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
4300 }
4301
4302 PRE(sys_nanosleep)
4303 {
4304 *flags |= SfMayBlock|SfPostOnFail;
4305 PRINT("sys_nanosleep ( %#" FMT_REGWORD "x, %#" FMT_REGWORD "x )", ARG1,ARG2);
4306 PRE_REG_READ2(long, "nanosleep",
4307 struct timespec *, req, struct timespec *, rem);
4308 PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
4309 if (ARG2 != 0)
4310 PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
4311 }
4312
4313 POST(sys_nanosleep)
4314 {
4315 vg_assert(SUCCESS || FAILURE);
4316 if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
4317 POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
4318 }
4319
4320 #if defined(VGO_linux) || defined(VGO_solaris)
4321 /* Handles the case where the open is of /proc/self/auxv or
4322 /proc/<pid>/auxv, and just gives out a copy of the fd for the
4323 fake file we cooked up at startup (in m_main). Also, seeks the
4324 cloned fd back to the start.
4325 Returns True if auxv open was handled (status is set). */
4326 Bool ML_(handle_auxv_open)(SyscallStatus *status, const HChar *filename,
4327 int flags)
4328 {
4329 HChar name[30]; // large enough
4330
4331 if (!ML_(safe_to_deref)((const void *) filename, 1))
4332 return False;
4333
4334 /* Opening /proc/<pid>/auxv or /proc/self/auxv? */
4335 VG_(sprintf)(name, "/proc/%d/auxv", VG_(getpid)());
4336 if (!VG_STREQ(filename, name) && !VG_STREQ(filename, "/proc/self/auxv"))
4337 return False;
4338
4339 /* Allow to open the file only for reading. */
4340 if (flags & (VKI_O_WRONLY | VKI_O_RDWR)) {
4341 SET_STATUS_Failure(VKI_EACCES);
4342 return True;
4343 }
4344
4345 # if defined(VGO_solaris)
4346 VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_auxv_fd));
4347 SysRes sres = VG_(open)(name, flags, 0);
4348 SET_STATUS_from_SysRes(sres);
4349 # else
4350 SysRes sres = VG_(dup)(VG_(cl_auxv_fd));
4351 SET_STATUS_from_SysRes(sres);
4352 if (!sr_isError(sres)) {
4353 OffT off = VG_(lseek)(sr_Res(sres), 0, VKI_SEEK_SET);
4354 if (off < 0)
4355 SET_STATUS_Failure(VKI_EMFILE);
4356 }
4357 # endif
4358
4359 return True;
4360 }
4361 #endif // defined(VGO_linux) || defined(VGO_solaris)
4362
4363 #if defined(VGO_linux)
4364 Bool ML_(handle_self_exe_open)(SyscallStatus *status, const HChar *filename,
4365 int flags)
4366 {
4367 HChar name[30]; // large enough for /proc/<int>/exe
4368
4369 if (!ML_(safe_to_deref)((const void *) filename, 1))
4370 return False;
4371
4372 /* Opening /proc/<pid>/exe or /proc/self/exe? */
4373 VG_(sprintf)(name, "/proc/%d/exe", VG_(getpid)());
4374 if (!VG_STREQ(filename, name) && !VG_STREQ(filename, "/proc/self/exe"))
4375 return False;
4376
4377 /* Allow to open the file only for reading. */
4378 if (flags & (VKI_O_WRONLY | VKI_O_RDWR)) {
4379 SET_STATUS_Failure(VKI_EACCES);
4380 return True;
4381 }
4382
4383 SysRes sres = VG_(dup)(VG_(cl_exec_fd));
4384 SET_STATUS_from_SysRes(sres);
4385 if (!sr_isError(sres)) {
4386 OffT off = VG_(lseek)(sr_Res(sres), 0, VKI_SEEK_SET);
4387 if (off < 0)
4388 SET_STATUS_Failure(VKI_EMFILE);
4389 }
4390
4391 return True;
4392 }
4393 #endif // defined(VGO_linux)
4394
4395 PRE(sys_open)
4396 {
4397 if (ARG2 & VKI_O_CREAT) {
4398 // 3-arg version
4399 PRINT("sys_open ( %#" FMT_REGWORD "x(%s), %ld, %ld )",ARG1,
4400 (HChar*)(Addr)ARG1, SARG2, SARG3);
4401 PRE_REG_READ3(long, "open",
4402 const char *, filename, int, flags, int, mode);
4403 } else {
4404 // 2-arg version
4405 PRINT("sys_open ( %#" FMT_REGWORD "x(%s), %ld )",ARG1,
4406 (HChar*)(Addr)ARG1, SARG2);
4407 PRE_REG_READ2(long, "open",
4408 const char *, filename, int, flags);
4409 }
4410 PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
4411
4412 #if defined(VGO_linux)
4413 /* Handle the case where the open is of /proc/self/cmdline or
4414 /proc/<pid>/cmdline, and just give it a copy of the fd for the
4415 fake file we cooked up at startup (in m_main). Also, seek the
4416 cloned fd back to the start. */
4417 {
4418 HChar name[30]; // large enough
4419 HChar* arg1s = (HChar*) (Addr)ARG1;
4420 SysRes sres;
4421
4422 VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
4423 if (ML_(safe_to_deref)( arg1s, 1 )
4424 && (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))) {
4425 sres = VG_(dup)( VG_(cl_cmdline_fd) );
4426 SET_STATUS_from_SysRes( sres );
4427 if (!sr_isError(sres)) {
4428 OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
4429 if (off < 0)
4430 SET_STATUS_Failure( VKI_EMFILE );
4431 }
4432 return;
4433 }
4434 }
4435
4436 /* Handle also the case of /proc/self/auxv or /proc/<pid>/auxv
4437 or /proc/self/exe or /proc/<pid>/exe. */
4438 if (ML_(handle_auxv_open)(status, (const HChar *)(Addr)ARG1, ARG2)
4439 || ML_(handle_self_exe_open)(status, (const HChar *)(Addr)ARG1, ARG2))
4440 return;
4441 #endif // defined(VGO_linux)
4442
4443 /* Otherwise handle normally */
4444 *flags |= SfMayBlock;
4445 }
4446
4447 POST(sys_open)
4448 {
4449 vg_assert(SUCCESS);
4450 if (!ML_(fd_allowed)(RES, "open", tid, True)) {
4451 VG_(close)(RES);
4452 SET_STATUS_Failure( VKI_EMFILE );
4453 } else {
4454 if (VG_(clo_track_fds))
4455 ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)(Addr)ARG1);
4456 }
4457 }
4458
4459 PRE(sys_read)
4460 {
4461 *flags |= SfMayBlock;
4462 PRINT("sys_read ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %"
4463 FMT_REGWORD "u )", ARG1, ARG2, ARG3);
4464 PRE_REG_READ3(ssize_t, "read",
4465 int, fd, char *, buf, vki_size_t, count);
4466
4467 if (!ML_(fd_allowed)(ARG1, "read", tid, False))
4468 SET_STATUS_Failure( VKI_EBADF );
4469 else
4470 PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
4471 }
4472
4473 POST(sys_read)
4474 {
4475 vg_assert(SUCCESS);
4476 POST_MEM_WRITE( ARG2, RES );
4477 }
4478
4479 PRE(sys_write)
4480 {
4481 Bool ok;
4482 *flags |= SfMayBlock;
4483 PRINT("sys_write ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %"
4484 FMT_REGWORD "u )", ARG1, ARG2, ARG3);
4485 PRE_REG_READ3(ssize_t, "write",
4486 unsigned int, fd, const char *, buf, vki_size_t, count);
4487 /* check to see if it is allowed. If not, try for an exemption from
4488 --sim-hints=enable-outer (used for self hosting). */
4489 ok = ML_(fd_allowed)(ARG1, "write", tid, False);
4490 if (!ok && ARG1 == 2/*stderr*/
4491 && SimHintiS(SimHint_enable_outer, VG_(clo_sim_hints)))
4492 ok = True;
4493 #if defined(VGO_solaris)
4494 if (!ok && VG_(vfork_fildes_addr) != NULL
4495 && *VG_(vfork_fildes_addr) >= 0 && *VG_(vfork_fildes_addr) == ARG1)
4496 ok = True;
4497 #endif
4498 if (!ok)
4499 SET_STATUS_Failure( VKI_EBADF );
4500 else
4501 PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
4502 }
4503
4504 PRE(sys_creat)
4505 {
4506 *flags |= SfMayBlock;
4507 PRINT("sys_creat ( %#" FMT_REGWORD "x(%s), %ld )", ARG1,
4508 (HChar*)(Addr)ARG1, SARG2);
4509 PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
4510 PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
4511 }
4512
4513 POST(sys_creat)
4514 {
4515 vg_assert(SUCCESS);
4516 if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
4517 VG_(close)(RES);
4518 SET_STATUS_Failure( VKI_EMFILE );
4519 } else {
4520 if (VG_(clo_track_fds))
4521 ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)(Addr)ARG1);
4522 }
4523 }
4524
4525 PRE(sys_poll)
4526 {
4527 /* struct pollfd {
4528 int fd; -- file descriptor
4529 short events; -- requested events
4530 short revents; -- returned events
4531 };
4532 int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
4533 */
4534 UInt i;
4535 struct vki_pollfd* ufds = (struct vki_pollfd *)(Addr)ARG1;
4536 *flags |= SfMayBlock;
4537 PRINT("sys_poll ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u, %ld )\n",
4538 ARG1, ARG2, SARG3);
4539 PRE_REG_READ3(long, "poll",
4540 struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
4541
4542 for (i = 0; i < ARG2; i++) {
4543 PRE_MEM_READ( "poll(ufds.fd)",
4544 (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
4545 if (ML_(safe_to_deref)(&ufds[i].fd, sizeof(ufds[i].fd)) && ufds[i].fd >= 0) {
4546 PRE_MEM_READ( "poll(ufds.events)",
4547 (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
4548 }
4549 PRE_MEM_WRITE( "poll(ufds.revents)",
4550 (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
4551 }
4552 }
4553
4554 POST(sys_poll)
4555 {
4556 if (SUCCESS) {
4557 UInt i;
4558 struct vki_pollfd* ufds = (struct vki_pollfd *)(Addr)ARG1;
4559 for (i = 0; i < ARG2; i++)
4560 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
4561 }
4562 }
4563
4564 PRE(sys_readlink)
4565 {
4566 FUSE_COMPATIBLE_MAY_BLOCK();
4567 Word saved = SYSNO;
4568
4569 PRINT("sys_readlink ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x, %llu )",
4570 ARG1, (char*)(Addr)ARG1, ARG2, (ULong)ARG3);
4571 PRE_REG_READ3(long, "readlink",
4572 const char *, path, char *, buf, int, bufsiz);
4573 PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
4574 PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
4575
4576
4577 {
4578 #if defined(VGO_linux) || defined(VGO_solaris)
4579 #if defined(VGO_linux)
4580 #define PID_EXEPATH "/proc/%d/exe"
4581 #define SELF_EXEPATH "/proc/self/exe"
4582 #define SELF_EXEFD "/proc/self/fd/%d"
4583 #elif defined(VGO_solaris)
4584 #define PID_EXEPATH "/proc/%d/path/a.out"
4585 #define SELF_EXEPATH "/proc/self/path/a.out"
4586 #define SELF_EXEFD "/proc/self/path/%d"
4587 #endif
4588 /*
4589 * Handle the case where readlink is looking at /proc/self/exe or
4590 * /proc/<pid>/exe, or equivalent on Solaris.
4591 */
4592 HChar name[30]; // large enough
4593 HChar* arg1s = (HChar*) (Addr)ARG1;
4594 VG_(sprintf)(name, PID_EXEPATH, VG_(getpid)());
4595 if (ML_(safe_to_deref)(arg1s, 1)
4596 && (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, SELF_EXEPATH))) {
4597 VG_(sprintf)(name, SELF_EXEFD, VG_(cl_exec_fd));
4598 SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
4599 ARG2, ARG3));
4600 } else
4601 #endif
4602 {
4603 /* Normal case */
4604 SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
4605 }
4606 }
4607
4608 if (SUCCESS && RES > 0)
4609 POST_MEM_WRITE( ARG2, RES );
4610 }
4611
4612 PRE(sys_readv)
4613 {
4614 Int i;
4615 struct vki_iovec * vec;
4616 char buf[sizeof("readv(vector[])") + 11];
4617 *flags |= SfMayBlock;
4618 PRINT("sys_readv ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %"
4619 FMT_REGWORD "u )", ARG1, ARG2, ARG3);
4620 PRE_REG_READ3(ssize_t, "readv",
4621 unsigned long, fd, const struct iovec *, vector,
4622 unsigned long, count);
4623 if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
4624 SET_STATUS_Failure( VKI_EBADF );
4625 } else {
4626 if ((Int)ARG3 >= 0)
4627 PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
4628
4629 if (ML_(safe_to_deref)((const void*)ARG2, ARG3*sizeof(struct vki_iovec *))) {
4630 vec = (struct vki_iovec *)(Addr)ARG2;
4631 for (i = 0; i < (Int)ARG3; i++) {
4632 VG_(sprintf)(buf, "readv(vector[%d])", i);
4633 PRE_MEM_WRITE(buf, (Addr)vec[i].iov_base, vec[i].iov_len );
4634 }
4635 }
4636 }
4637 }
4638
4639 POST(sys_readv)
4640 {
4641 vg_assert(SUCCESS);
4642 if (RES > 0) {
4643 Int i;
4644 struct vki_iovec * vec = (struct vki_iovec *)(Addr)ARG2;
4645 Int remains = RES;
4646
4647 /* RES holds the number of bytes read. */
4648 for (i = 0; i < (Int)ARG3; i++) {
4649 Int nReadThisBuf = vec[i].iov_len;
4650 if (nReadThisBuf > remains) nReadThisBuf = remains;
4651 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
4652 remains -= nReadThisBuf;
4653 if (remains < 0) VG_(core_panic)("readv: remains < 0");
4654 }
4655 }
4656 }
4657
4658 PRE(sys_rename)
4659 {
4660 FUSE_COMPATIBLE_MAY_BLOCK();
4661 PRINT("sys_rename ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x(%s) )", ARG1,
4662 (char*)(Addr)ARG1,ARG2,(char*)(Addr)ARG2);
4663 PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
4664 PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
4665 PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
4666 }
4667
4668 PRE(sys_rmdir)
4669 {
4670 *flags |= SfMayBlock;
4671 PRINT("sys_rmdir ( %#" FMT_REGWORD "x(%s) )", ARG1,(char*)(Addr)ARG1);
4672 PRE_REG_READ1(long, "rmdir", const char *, pathname);
4673 PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
4674 }
4675
4676 PRE(sys_select)
4677 {
4678 *flags |= SfMayBlock;
4679 PRINT("sys_select ( %ld, %#" FMT_REGWORD "x, %#" FMT_REGWORD "x, %#"
4680 FMT_REGWORD "x, %#" FMT_REGWORD "x )", SARG1, ARG2, ARG3, ARG4, ARG5);
4681 PRE_REG_READ5(long, "select",
4682 int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
4683 vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
4684 // XXX: this possibly understates how much memory is read.
4685 if (ARG2 != 0)
4686 PRE_MEM_READ( "select(readfds)",
4687 ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
4688 if (ARG3 != 0)
4689 PRE_MEM_READ( "select(writefds)",
4690 ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
4691 if (ARG4 != 0)
4692 PRE_MEM_READ( "select(exceptfds)",
4693 ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
4694 if (ARG5 != 0)
4695 PRE_timeval_READ( "select(timeout)", (Addr)ARG5 );
4696 }
4697
4698 PRE(sys_setgid)
4699 {
4700 PRINT("sys_setgid ( %" FMT_REGWORD "u )", ARG1);
4701 PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
4702 }
4703
4704 PRE(sys_setsid)
4705 {
4706 PRINT("sys_setsid ( )");
4707 PRE_REG_READ0(long, "setsid");
4708 }
4709
4710 PRE(sys_setgroups)
4711 {
4712 PRINT("setgroups ( %llu, %#" FMT_REGWORD "x )", (ULong)ARG1, ARG2);
4713 PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
4714 if (ARG1 > 0)
4715 PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
4716 }
4717
4718 PRE(sys_setpgid)
4719 {
4720 PRINT("setpgid ( %ld, %ld )", SARG1, SARG2);
4721 PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
4722 }
4723
4724 PRE(sys_setregid)
4725 {
4726 PRINT("sys_setregid ( %" FMT_REGWORD "u, %" FMT_REGWORD "u )", ARG1, ARG2);
4727 PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
4728 }
4729
4730 PRE(sys_setreuid)
4731 {
4732 PRINT("sys_setreuid ( 0x%" FMT_REGWORD "x, 0x%" FMT_REGWORD "x )",
4733 ARG1, ARG2);
4734 PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
4735 }
4736
4737 PRE(sys_setrlimit)
4738 {
4739 UWord arg1 = ARG1;
4740 PRINT("sys_setrlimit ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x )", ARG1, ARG2);
4741 PRE_REG_READ2(long, "setrlimit",
4742 unsigned int, resource, struct rlimit *, rlim);
4743 PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
4744
4745 #ifdef _RLIMIT_POSIX_FLAG
4746 // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
4747 // Unset it here to make the if statements below work correctly.
4748 arg1 &= ~_RLIMIT_POSIX_FLAG;
4749 #endif
4750
4751 if (!VG_(am_is_valid_for_client)(ARG2, sizeof(struct vki_rlimit),
4752 VKI_PROT_READ)) {
4753 SET_STATUS_Failure( VKI_EFAULT );
4754 }
4755 else if (((struct vki_rlimit *)(Addr)ARG2)->rlim_cur
4756 > ((struct vki_rlimit *)(Addr)ARG2)->rlim_max) {
4757 #if defined(VGO_freebsd)
4758 SET_STATUS_Failure( VKI_EPERM );
4759 #else
4760 SET_STATUS_Failure( VKI_EINVAL );
4761 #endif
4762 }
4763 else if (arg1 == VKI_RLIMIT_NOFILE) {
4764 if (((struct vki_rlimit *)(Addr)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
4765 ((struct vki_rlimit *)(Addr)ARG2)->rlim_max != VG_(fd_hard_limit)) {
4766 SET_STATUS_Failure( VKI_EPERM );
4767 }
4768 else {
4769 VG_(fd_soft_limit) = ((struct vki_rlimit *)(Addr)ARG2)->rlim_cur;
4770 SET_STATUS_Success( 0 );
4771 }
4772 }
4773 else if (arg1 == VKI_RLIMIT_DATA) {
4774 if (((struct vki_rlimit *)(Addr)ARG2)->rlim_cur
4775 > VG_(client_rlimit_data).rlim_max ||
4776 ((struct vki_rlimit *)(Addr)ARG2)->rlim_max
4777 > VG_(client_rlimit_data).rlim_max) {
4778 SET_STATUS_Failure( VKI_EPERM );
4779 }
4780 else {
4781 VG_(client_rlimit_data) = *(struct vki_rlimit *)(Addr)ARG2;
4782 SET_STATUS_Success( 0 );
4783 }
4784 }
4785 else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
4786 if (((struct vki_rlimit *)(Addr)ARG2)->rlim_cur
4787 > VG_(client_rlimit_stack).rlim_max ||
4788 ((struct vki_rlimit *)(Addr)ARG2)->rlim_max
4789 > VG_(client_rlimit_stack).rlim_max) {
4790 SET_STATUS_Failure( VKI_EPERM );
4791 }
4792 else {
4793 /* Change the value of client_stack_szB to the rlim_cur value but
4794 only if it is smaller than the size of the allocated stack for the
4795 client.
4796 TODO: All platforms should set VG_(clstk_max_size) as part of their
4797 setup_client_stack(). */
4798 if ((VG_(clstk_max_size) == 0)
4799 || (((struct vki_rlimit *) (Addr)ARG2)->rlim_cur <= VG_(clstk_max_size)))
4800 VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)(Addr)ARG2)->rlim_cur;
4801
4802 VG_(client_rlimit_stack) = *(struct vki_rlimit *)(Addr)ARG2;
4803 SET_STATUS_Success( 0 );
4804 }
4805 }
4806 }
4807
4808 PRE(sys_setuid)
4809 {
4810 PRINT("sys_setuid ( %" FMT_REGWORD "u )", ARG1);
4811 PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
4812 }
4813
4814 #if !defined(VGP_nanomips_linux) && !defined(VGO_freebsd)
4815 PRE(sys_newstat)
4816 {
4817 FUSE_COMPATIBLE_MAY_BLOCK();
4818 PRINT("sys_newstat ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x )",
4819 ARG1,(char*)(Addr)ARG1,ARG2);
4820 PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
4821 PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
4822 PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
4823 }
4824
4825 POST(sys_newstat)
4826 {
4827 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
4828 }
4829 #endif
4830
4831 #if !defined(VGP_nanomips_linux)
4832 PRE(sys_statfs)
4833 {
4834 FUSE_COMPATIBLE_MAY_BLOCK();
4835 PRINT("sys_statfs ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x )",
4836 ARG1, (char*)(Addr)ARG1, ARG2);
4837 PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
4838 PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
4839 PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
4840 }
4841 POST(sys_statfs)
4842 {
4843 POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
4844 }
4845
4846 PRE(sys_statfs64)
4847 {
4848 PRINT("sys_statfs64 ( %#" FMT_REGWORD "x(%s), %llu, %#" FMT_REGWORD "x )",
4849 ARG1, (char*)(Addr)ARG1, (ULong)ARG2, ARG3);
4850 PRE_REG_READ3(long, "statfs64",
4851 const char *, path, vki_size_t, size, struct statfs64 *, buf);
4852 PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
4853 PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
4854 }
4855 POST(sys_statfs64)
4856 {
4857 POST_MEM_WRITE( ARG3, ARG2 );
4858 }
4859 #endif
4860
4861 PRE(sys_symlink)
4862 {
4863 *flags |= SfMayBlock;
4864 PRINT("sys_symlink ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x(%s) )",
4865 ARG1, (char*)(Addr)ARG1, ARG2, (char*)(Addr)ARG2);
4866 PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
4867 PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
4868 PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
4869 }
4870
4871 PRE(sys_time)
4872 {
4873 /* time_t time(time_t *t); */
4874 PRINT("sys_time ( %#" FMT_REGWORD "x )",ARG1);
4875 PRE_REG_READ1(long, "time", int *, t);
4876 if (ARG1 != 0) {
4877 PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
4878 }
4879 }
4880
4881 POST(sys_time)
4882 {
4883 if (ARG1 != 0) {
4884 POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
4885 }
4886 }
4887
4888 PRE(sys_times)
4889 {
4890 PRINT("sys_times ( %#" FMT_REGWORD "x )", ARG1);
4891 PRE_REG_READ1(long, "times", struct tms *, buf);
4892 if (ARG1 != 0) {
4893 PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
4894 }
4895 }
4896
4897 POST(sys_times)
4898 {
4899 if (ARG1 != 0) {
4900 POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
4901 }
4902 }
4903
4904 PRE(sys_umask)
4905 {
4906 PRINT("sys_umask ( %ld )", SARG1);
4907 PRE_REG_READ1(long, "umask", int, mask);
4908 }
4909
4910 PRE(sys_unlink)
4911 {
4912 *flags |= SfMayBlock;
4913 PRINT("sys_unlink ( %#" FMT_REGWORD "x(%s) )", ARG1,(char*)(Addr)ARG1);
4914 PRE_REG_READ1(long, "unlink", const char *, pathname);
4915 PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
4916 }
4917
4918 #if !defined(VGO_freebsd)
4919 PRE(sys_newuname)
4920 {
4921 PRINT("sys_newuname ( %#" FMT_REGWORD "x )", ARG1);
4922 PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
4923 PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
4924 }
4925
4926 POST(sys_newuname)
4927 {
4928 if (ARG1 != 0) {
4929 POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
4930 }
4931 }
4932 #endif
4933
4934 PRE(sys_waitpid)
4935 {
4936 *flags |= SfMayBlock;
4937 PRINT("sys_waitpid ( %ld, %#" FMT_REGWORD "x, %ld )", SARG1, ARG2, SARG3);
4938 PRE_REG_READ3(long, "waitpid",
4939 vki_pid_t, pid, unsigned int *, status, int, options);
4940
4941 if (ARG2 != (Addr)NULL)
4942 PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
4943 }
4944
4945 POST(sys_waitpid)
4946 {
4947 if (ARG2 != (Addr)NULL)
4948 POST_MEM_WRITE( ARG2, sizeof(int) );
4949 }
4950
4951 PRE(sys_wait4)
4952 {
4953 *flags |= SfMayBlock;
4954 PRINT("sys_wait4 ( %ld, %#" FMT_REGWORD "x, %ld, %#" FMT_REGWORD "x )",
4955 SARG1, ARG2, SARG3, ARG4);
4956
4957 PRE_REG_READ4(long, "wait4",
4958 vki_pid_t, pid, unsigned int *, status, int, options,
4959 struct rusage *, rusage);
4960 if (ARG2 != (Addr)NULL)
4961 PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
4962 if (ARG4 != (Addr)NULL)
4963 PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
4964 }
4965
4966 POST(sys_wait4)
4967 {
4968 if (ARG2 != (Addr)NULL)
4969 POST_MEM_WRITE( ARG2, sizeof(int) );
4970 if (ARG4 != (Addr)NULL)
4971 POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
4972 }
4973
4974 PRE(sys_writev)
4975 {
4976 Int i;
4977 struct vki_iovec * vec;
4978 char buf[sizeof("writev(vector[])") + 11];
4979 *flags |= SfMayBlock;
4980 PRINT("sys_writev ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %"
4981 FMT_REGWORD "u )", ARG1, ARG2, ARG3);
4982 PRE_REG_READ3(ssize_t, "writev",
4983 unsigned long, fd, const struct iovec *, vector,
4984 unsigned long, count);
4985 if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
4986 SET_STATUS_Failure( VKI_EBADF );
4987 } else {
4988 if ((Int)ARG3 >= 0)
4989 PRE_MEM_READ( "writev(vector)",
4990 ARG2, ARG3 * sizeof(struct vki_iovec) );
4991
4992 if (ML_(safe_to_deref)((const void*)ARG2, ARG3*sizeof(struct vki_iovec *))) {
4993 vec = (struct vki_iovec *)(Addr)ARG2;
4994 for (i = 0; i < (Int)ARG3; i++) {
4995 VG_(sprintf)(buf, "writev(vector[%d])", i);
4996 PRE_MEM_READ( buf, (Addr)vec[i].iov_base, vec[i].iov_len );
4997 }
4998 }
4999 }
5000 }
5001
5002 PRE(sys_utimes)
5003 {
5004 FUSE_COMPATIBLE_MAY_BLOCK();
5005 PRINT("sys_utimes ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x )",
5006 ARG1, (char*)(Addr)ARG1, ARG2);
5007 PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
5008 PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
5009 if (ARG2 != 0) {
5010 PRE_timeval_READ( "utimes(tvp[0])", (Addr)ARG2 );
5011 PRE_timeval_READ( "utimes(tvp[1])",
5012 (Addr)ARG2+sizeof(struct vki_timeval) );
5013 }
5014 }
5015
5016 PRE(sys_acct)
5017 {
5018 PRINT("sys_acct ( %#" FMT_REGWORD "x(%s) )", ARG1,(char*)(Addr)ARG1);
5019 PRE_REG_READ1(long, "acct", const char *, filename);
5020 PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
5021 }
5022
5023 PRE(sys_pause)
5024 {
5025 *flags |= SfMayBlock;
5026 PRINT("sys_pause ( )");
5027 PRE_REG_READ0(long, "pause");
5028 }
5029
5030 PRE(sys_sigaltstack)
5031 {
5032 PRINT("sigaltstack ( %#" FMT_REGWORD "x, %#" FMT_REGWORD "x )",ARG1,ARG2);
5033 PRE_REG_READ2(int, "sigaltstack",
5034 const vki_stack_t *, ss, vki_stack_t *, oss);
5035 if (ARG1 != 0) {
5036 const vki_stack_t *ss = (vki_stack_t *)(Addr)ARG1;
5037 PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
5038 PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
5039 PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
5040 }
5041 if (ARG2 != 0) {
5042 PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
5043 }
5044
5045 /* Be safe. */
5046 if (ARG1 && !ML_(safe_to_deref((void*)(Addr)ARG1, sizeof(vki_stack_t)))) {
5047 SET_STATUS_Failure(VKI_EFAULT);
5048 return;
5049 }
5050 if (ARG2 && !ML_(safe_to_deref((void*)(Addr)ARG2, sizeof(vki_stack_t)))) {
5051 SET_STATUS_Failure(VKI_EFAULT);
5052 return;
5053 }
5054
5055 SET_STATUS_from_SysRes(
5056 VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)(Addr)ARG1,
5057 (vki_stack_t*)(Addr)ARG2)
5058 );
5059 }
5060 POST(sys_sigaltstack)
5061 {
5062 vg_assert(SUCCESS);
5063 if (RES == 0 && ARG2 != 0)
5064 POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
5065 }
5066
5067 PRE(sys_sethostname)
5068 {
5069 PRINT("sys_sethostname ( %#" FMT_REGWORD "x, %ld )", ARG1, SARG2);
5070 PRE_REG_READ2(long, "sethostname", char *, name, int, len);
5071 PRE_MEM_READ( "sethostname(name)", ARG1, ARG2 );
5072 }
5073
5074 #undef PRE
5075 #undef POST
5076
5077 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris) || defined(VGO_freebsd)
5078
5079 /*--------------------------------------------------------------------*/
5080 /*--- end ---*/
5081 /*--------------------------------------------------------------------*/