Sets screen font and associated rendering information.
.I argp
points to a
-
+.IP
.in +4n
.EX
struct consolefontdesc {
};
.EE
.in
-
+.IP
If necessary, the screen will be appropriately resized, and
.B SIGWINCH
sent to the appropriate processes.
situation as of kernel version 1.1.94;
there are many minor and not-so-minor
differences with earlier versions.)
-
+.PP
Very often, ioctls are introduced for communication between the
kernel and one particular well-known program (fdisk, hdparm, setserial,
tunelp, loadkeys, selection, setfont, etc.), and their behavior will be
changed when required by this particular program.
-
+.PP
Programs using these ioctls will not be portable to other versions
of UNIX, will not work on older versions of Linux, and will not work
on future versions of Linux.
-
+.PP
Use POSIX functions.
.SH SEE ALSO
.BR dumpkeys (1),
.BR mapscrn (8),
.BR resizecons (8),
.BR setfont (8)
-
+.PP
.IR /usr/include/linux/kd.h ,
.I /usr/include/linux/vt.h
.PP
The sole argument to this operation should be a pointer to a single
.IR "struct fsmap_head" ":"
+.PP
.in +4n
-.nf
-
+.EX
struct fsmap {
__u32 fmr_device; /* Device ID */
__u32 fmr_flags; /* Mapping flags */
the mapping search */
struct fsmap fmh_recs[]; /* Returned records */
};
-
-.fi
+.EE
.in
+.PP
The two
.I fmh_keys
array elements specify the lowest and highest reverse-mapping
These flags can be retrieved and modified using two
.BR ioctl (2)
operations:
-
-.nf
+.PP
.in +4n
+.nf
int attr;
fd = open("pathname", ...);
attr |= FS_NOATIME_FL; /* Tweak returned bit mask */
ioctl(fd, FS_IOC_SETFLAGS, &attr); /* Update flags for inode
referred to by \(aqfd\(aq */
-.in
.fi
-
+.in
+.PP
The
.BR lsattr (1)
and
.BR chattr (1)
shell commands provide interfaces to these two operations,
allowing a user to view and modify the inode flags associated with a file.
-
+.PP
The following flags are supported
(shown along with the corresponding letter used to indicate the flag by
.BR lsattr (1)
However,
quite a few ioctls in fact return an output value.
This is not yet indicated below.
-
+.PP
// Main table.
-
+.PP
// <include/asm-i386/socket.h>
.TS
l l l.
0x00008905 SIOCATMAR int *
0x00008906 SIOCGSTAMP timeval *
.TE
-
+.sp 1
// <include/asm-i386/termios.h>
.TS
l l l l.
0x0000545A TIOCSERGETMULTI struct serial_multiport_struct *
0x0000545B TIOCSERSETMULTI const struct serial_multiport_struct *
.TE
-
+.sp 1
// <include/linux/ax25.h>
.TS
l l l l.
l l l.
0x000089E6 SIOCAX25SETPARMS const struct ax25_parms_struct *
.TE
-
+.sp 1
// <include/linux/cdk.h>
.TS
l l l.
0x00007316 STL_BSTOP void
0x00007317 STL_BRESET void
.TE
-
+.sp 1
// <include/linux/cdrom.h>
.TS
l l l.
0x00005315 CDROMREADCOOKED const struct cdrom_msf * // MORE
0x00005316 CDROMSEEK const struct cdrom_msf *
.TE
-
+.sp 1
// <include/linux/cm206.h>
.TS
l l l.
0x00002000 CM206CTL_GET_STAT int
0x00002001 CM206CTL_GET_LAST_STAT int
.TE
-
+.sp 1
// <include/linux/cyclades.h>
.TS
l l l.
0x00435908 CYGETDEFTIMEOUT int *
0x00435909 CYSETDEFTIMEOUT int
.TE
-
+.sp 1
// <include/linux/fd.h>
.TS
l l l.
0x0000001E FDRAWCMD struct floppy_raw_cmd * // MORE // I-O
0x00000028 FDTWADDLE void
.TE
-
+.sp 1
// <include/linux/fs.h>
.TS
l l l l.
0x80047601 FS_IOC32_GETVERSION int *
0x40047602 FS_IOC32_SETVERSION int *
.TE
-
+.sp 1
// <include/linux/hdreg.h>
.TS
l l l l.
0x00000325 HDIO_SET_NOWERR int
0x00000326 HDIO_SET_DMA int
.TE
-
+.sp 1
// <include/linux/if_eql.h>
.TS
l l l l.
0x000089F4 EQL_GETMASTRCFG struct ifreq * // MORE // I-O
0x000089F5 EQL_SETMASTRCFG struct ifreq * // MORE // I-O
.TE
-
+.sp 1
// <include/linux/if_plip.h>
.TS
l l l l.
0x000089F0 SIOCDEVPLIP struct ifreq * // I-O
.TE
-
+.sp 1
// <include/linux/if_ppp.h>
.TS
l l l.
0x0000549E PPPIOCRASYNCMAP const int *
0x0000549F PPPIOCSMAXCID const int *
.TE
-
+.sp 1
// <include/linux/ipx.h>
.TS
l l l.
0x000089E1 SIOCAIPXPRISLT const char *
0x000089E2 SIOCIPXCFGDATA struct ipx_config_data *
.TE
-
+.sp 1
// <include/linux/kd.h>
.TS
l l l.
0x00004B4D KDSETKEYCODE const struct kbkeycode *
0x00004B4E KDSIGACCEPT int
.TE
-
+.sp 1
// <include/linux/lp.h>
.TS
l l l.
0x0000060C LPRESET void
0x0000060D LPGETSTATS struct lp_stats *
.TE
-
+.sp 1
// <include/linux/mroute.h>
.TS
l l l l.
0x000089E0 SIOCGETVIFCNT struct sioc_vif_req * // I-O
0x000089E1 SIOCGETSGCNT struct sioc_sg_req * // I-O
.TE
-
+.sp 1
// <include/linux/msdos_fs.h> see
.BR ioctl_fat (2)
.TS
0x40047211 FAT_IOCTL_SET_ATTRIBUTES const __u32 *
0x80047213 FAT_IOCTL_GET_VOLUME_ID __u32 *
.TE
-
+.sp 1
// <include/linux/mtio.h>
.TS
l l l.
0x80206D04 MTIOCGETCONFIG struct mtconfiginfo *
0x40206D05 MTIOCSETCONFIG const struct mtconfiginfo *
.TE
-
+.sp 1
// <include/linux/netrom.h>
.TS
l l l l.
0x000089E2 SIOCNRDECOBS void
0x000089E3 SIOCNRRTCTL const int *
.TE
-
+.sp 1
// <include/uapi/linux/wireless.h>
.br
// This API is deprecated.
x00008b35 SIOCGIWENCODEEXT struct iwreq *
x00008b36 SIOCSIWPMKSA struct iwreq *
.TE
-
+.sp 1
// <include/linux/sbpcd.h>
.TS
l l l.
0x00009000 DDIOCSDBG const int *
0x00005382 CDROMAUDIOBUFSIZ int
.TE
-
+.sp 1
// <include/linux/scc.h>
.TS
l l l l.
0x00005473 TIOCSKISS const struct ioctl_command *
0x00005474 TIOCSCCSTAT struct scc_stat *
.TE
-
+.sp 1
// <include/linux/scsi.h>
.TS
l l l.
l l l l.
0x00005385 SCSI_IOCTL_PROBE_HOST const int * // MORE
.TE
-
+.sp 1
// <include/linux/smb_fs.h>
.TS
l l l.
0x80027501 SMB_IOC_GETMOUNTUID uid_t *
.TE
-
+.sp 1
// <include/uapi/linux/sockios.h> see
.BR netdevice (7)
-
+.PP
.TS
l l l l.
0x0000890B SIOCADDRT const struct rtentry * // MORE
0x00008970 SIOCGIFMAP struct ifreq * // I-O
0x00008971 SIOCSIFMAP const struct ifreq *
.TE
-
+.sp 1
// <include/linux/soundcard.h>
.TS
l l l.
0xC0044D1E SOUND_MIXER_WRITE_LOUD int * // I-O
0xC0044DFF SOUND_MIXER_WRITE_RECSRC int * // I-O
.TE
-
+.sp 1
// <include/linux/timerfd.h> see
.BR timerfd_create (2)
.TS
l l l l.
0x40085400 TFD_IOC_SET_TICKS uint64_t *
.TE
-
+.sp 1
// <include/linux/umsdos_fs.h>
.TS
l l l l.
0x000004DB UMSDOS_DOS_SETUP const struct umsdos_ioctl *
0x000004DC UMSDOS_RENAME_DOS const struct umsdos_ioctl *
.TE
-
+.sp 1
// <include/linux/vt.h>
.TS
l l l.
0x00005609 VT_RESIZE const struct vt_sizes *
0x0000560A VT_RESIZEX const struct vt_consize *
.TE
-
+.sp 1
// More arguments.
Some ioctl's take a pointer to a structure which contains additional
pointers.
These are documented here in alphabetical order.
-
+.PP
.B CDROMREADAUDIO
takes an input pointer
.IR "const struct cdrom_read_audio\ *" .
.I buf
field points to an output buffer of length
.IR "nframes\ * CD_FRAMESIZE_RAW" .
-
+.PP
.BR CDROMREADCOOKED ,
.BR CDROMREADMODE1 ,
.BR CDROMREADMODE2 ,
.I data
points to an output buffer of length
.IR length .
-
+.PP
.B GIO_FONTX
and
.B PIO_FONTX
.B GIO_FONTX
and an input buffer for
.BR PIO_FONTX .
-
+.PP
.B GIO_UNIMAP
and
.B PIO_UNIMAP
.B GIO_UNIMAP
and an input buffer for
.BR PIO_UNIMAP .
-
+.PP
KDADDIO, KDDELIO, KDDISABIO, and KDENABIO enable or disable access to
I/O ports.
They are essentially alternate interfaces to 'ioperm'.
-
+.PP
.B KDMAPDISP
and
.B KDUNMAPDISP
enable or disable memory mappings or I/O port access.
They are not implemented in the kernel.
-
+.PP
.B SCSI_IOCTL_PROBE_HOST
takes an input pointer
.IR "const int\ *" ,
It uses the same pointer as an output pointer to a
.I char []
buffer of this length.
-
+.PP
.B SIOCADDRT
and
.B SIOCDELRT
.I ifc_len
bytes, into which the kernel writes a list of type
.IR "struct ifreq []" .
-
+.PP
.B SIOCSIFHWADDR
takes an input pointer whose type depends on the protocol:
.nf
.fi
// Duplicate ioctls
-
+.PP
This list does not include ioctls in the range
.B SIOCDEVPRIVATE
and
.BR pid_namespaces (7)).
The form of the calls is:
.PP
-.nf
.in +4n
+.nf
new_fd = ioctl(fd, request);
.fi
.in
the type of namespace referred to by the file descriptor
.IR fd :
.PP
-.nf
.in +4n
+.nf
nstype = ioctl(fd, NS_GET_NSTYPE);
.fi
.in
of the process that created the user namespace).
The form of the call is:
.PP
-.nf
.in +4n
+.nf
uid_t uid;
ioctl(fd, NS_GET_OWNER_UID, &uid);
.fi
Trying to get the parent of the initial user namespace fails,
since it has no parent:
.PP
-.nf
.in +4n
+.nf
$ \fB./ns_show /proc/self/ns/user p\fP
The parent namespace is outside your namespace scope
-.in
.fi
+.in
.PP
Create a process running
.BR sleep (1)
that resides in new user and UTS namespaces,
and show that the new UTS namespace is associated with the new user namespace:
.PP
-.nf
.in +4n
+.nf
$ \fBunshare \-Uu sleep 1000 &\fP
[1] 23235
$ \fB./ns_show /proc/23235/ns/uts u\fP
Device/Inode of owning user namespace is: [0,3] / 4026532448
$ \fBreadlink /proc/23235/ns/user \fP
user:[4026532448]
-.in
.fi
+.in
.PP
Then show that the parent of the new user namespace in the preceding
example is the initial user namespace:
.PP
-.nf
.in +4n
+.nf
$ \fBreadlink /proc/self/ns/user\fP
user:[4026531837]
$ \fB./ns_show /proc/23235/ns/user p\fP
Device/Inode of parent namespace is: [0,3] / 4026531837
-.in
.fi
+.in
.PP
Start a shell in a new user namespace, and show that from within
this shell, the parent user namespace can't be discovered.
(which is associated with the initial user namespace)
can't be discovered.
.PP
-.nf
.in +4n
+.nf
$ \fBPS1="sh2$ " unshare \-U bash\fP
sh2$ \fB./ns_show /proc/self/ns/user p\fP
The parent namespace is outside your namespace scope
sh2$ \fB./ns_show /proc/self/ns/uts u\fP
The owning user namespace is outside your namespace scope
-.in
.fi
+.in
.SS Program source
\&
.EX
as described in the program output.
An example run of the program is as follows:
.PP
-.nf
.in +4n
+.nf
$ \fB./a.out\fP
Parent PID is 1144
Parent opened file on FD 3
Child duplicated FD 3 to create FD 5
Compare duplicated FDs in same process:
kcmp(1145, 1145, KCMP_FILE, 3, 5) ==> same
-.in
.fi
+.in
.SS Program source
\&
.EX
argument is an array of
.I kexec_segment
structures which define the kernel layout:
+.PP
.in +4n
-.nf
-
+.EX
struct kexec_segment {
void *buf; /* Buffer in user space */
size_t bufsz; /* Buffer length in user space */
keys used in the Diffie-Hellman calculation,
packaged in a structure of the following form:
.IP
-.nf
.in +4n
+.nf
struct keyctl_dh_params {
int32_t private; /* The local private key */
int32_t prime; /* The prime, known to both parties */
int32_t base; /* The base integer: either a shared
generator or the remote public key */
};
-.in
.fi
+.in
.IP
Each of the three keys specified in this structure must grant the caller
.I read
.BR request_key (2)
to request a key.
.PP
-.nf
.in +4n
+.nf
$ \fBcc \-o key_instantiate key_instantiate.c \-lkeyutils\fP
$ \fBsudo mv /sbin/request\-key /sbin/request\-key.backup\fP
$ \fBsudo cp key_instantiate /sbin/request\-key\fP
$ \fB./t_request_key user mykey somepayloaddata\fP
Key ID is 20d035bf
$ \fBsudo mv /sbin/request\-key.backup /sbin/request\-key\fP
-.in
.fi
+.in
.PP
Looking at the log file created by this program,
we can see the command-line arguments supplied to our example program:
.PP
-.nf
.in +4n
+.nf
$ \fBcat /tmp/key_instantiate.log \fP
Time: Mon Nov 7 13:06:47 2016
Auth key payload: somepayloaddata
Destination keyring: 256e6a6
Auth key description: .request_key_auth;1000;1000;0b010000;20d035bf
-.in
.fi
+.in
.PP
The last few lines of the above output show that the example program
was able to fetch:
and ID
.IR 20d035bf .
.PP
-.nf
.in +4n
+.nf
$ \fBcat /proc/keys | egrep \(aqmykey|256e6a6\(aq\fP
0256e6a6 I\-\-Q\-\-\- 194 perm 3f030000 1000 1000 keyring _ses: 3
20d035bf I\-\-Q\-\-\- 1 perm 3f010000 1000 1000 user mykey: 16
-.in
.fi
+.in
.SS Program source
\&
.EX
.BR AT_EMPTY_PATH ,
like this:
.IP
-.nf
.in +4n
+.nf
linkat(AT_FDCWD, "/proc/self/fd/<fd>", newdirfd,
newname, AT_SYMLINK_FOLLOW);
-.in
.fi
+.in
.PP
Before kernel 2.6.18, the
.I flags
.IR /proc/meminfo .)
Thus, the above two constants are defined as:
.IP
-.nf
.in +4n
+.nf
#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
-.in
.fi
+.in
.IP
The range of huge page sizes that are supported by the system
can be discovered by listing the subdirectories in
The
.I user_desc
structure is defined in \fI<asm/ldt.h>\fP as:
+.PP
.in +4n
-.nf
-
+.EX
struct user_desc {
unsigned int entry_number;
unsigned long base_addr;
The
.I msqid_ds
data structure is defined in \fI<sys/msg.h>\fP as follows:
-.nf
+.PP
.in +4n
-
+.EX
struct msqid_ds {
struct ipc_perm msg_perm; /* Ownership and permissions */
time_t msg_stime; /* Time of last msgsnd(2) */
pid_t msg_lspid; /* PID of last msgsnd(2) */
pid_t msg_lrpid; /* PID of last msgrcv(2) */
};
-.in
.fi
+.in
.PP
The
.I ipc_perm
(the highlighted fields are settable using
.BR IPC_SET ):
.PP
-.nf
.in +4n
+.nf
struct ipc_perm {
key_t __key; /* Key supplied to msgget(2) */
uid_t \fBuid\fP; /* Effective UID of owner */
unsigned short \fBmode\fP; /* Permissions */
unsigned short __seq; /* Sequence number */
};
-.in
.fi
+.in
.PP
Valid values for
.I cmd
if the
.B _GNU_SOURCE
feature test macro is defined:
-.nf
+.IP
.in +4n
-
+.EX
struct msginfo {
int msgpool; /* Size in kibibytes of buffer pool
used to hold message data;
/* Maximum number of segments;
unused within kernel */
};
-
+.EE
.in
-.fi
+.IP
The
.IR msgmni ,
.IR msgmax ,
The set of file descriptors to be monitored is specified in the
.I fds
argument, which is an array of structures of the following form:
+.PP
.in +4n
-.nf
-
+.EX
struct pollfd {
int fd; /* file descriptor */
short events; /* requested events */
short revents; /* returned events */
};
-.in
.fi
+.in
.PP
The caller should specify the number of items in the
.I fds
.BR ppoll ()
will block.
This argument is a pointer to a structure of the following form:
+.PP
.in +4n
-.nf
-
+.EX
struct timespec {
long tv_sec; /* seconds */
long tv_nsec; /* nanoseconds */
module.
The returned buffer is an array of structures of the following form
.\" ret is set on ENOSPC
+.IP
.in +4n
-.nf
-
+.EX
struct module_symbol {
unsigned long value;
unsigned long name;
.B QM_INFO
Returns miscellaneous information about the indicated module.
The output buffer format is:
+.IP
.in +4n
-.nf
-
+.EX
struct module_info {
unsigned long address;
unsigned long size;
structure defined in
.IR <sys/quota.h>
as follows:
+.IP
.in +4n
-.nf
-
+.EX
/* uint64_t is an unsigned 64\-bit integer;
uint32_t is an unsigned 32\-bit integer */
#define QIF_USAGE (QIF_SPACE | QIF_INODES)
#define QIF_TIMES (QIF_BTIME | QIF_ITIME)
#define QIF_ALL (QIF_LIMITS | QIF_USAGE | QIF_TIMES)
-
-.fi
+.EE
.in
+.IP
The
.I dqb_valid
field is a bit mask that is set to indicate the entries in the
field that is used to return the ID for which
quota information is being returned:
.IP
-.nf
.in +4n
+.nf
struct nextdqblk {
uint64_t dqb_bhardlimit;
uint64_t dqb_bsoftlimit;
uint32_t dqb_valid;
uint32_t dqb_id;
};
-.in
.fi
+.in
.TP
.B Q_SETQUOTA
Set quota information for user or group
This structure is defined in
.IR <sys/quota.h>
as follows:
+.IP
.in +4n
-.nf
-
+.EX
/* uint64_t is an unsigned 64\-bit integer;
uint32_t is an unsigned 32\-bit integer */
#define IIF_IGRACE 2
#define IIF_FLAGS 4
#define IIF_ALL (IIF_BGRACE | IIF_IGRACE | IIF_FLAGS)
-
-.fi
+.EE
.in
+.IP
The
.I dqi_valid
field in the
that contains a combination of the following flags (defined in
.IR <xfs/xqm.h> ):
.IP
-.nf
.in +4n
+.nf
#define XFS_QUOTA_UDQ_ACCT (1<<0) /* User quota
accounting */
#define XFS_QUOTA_UDQ_ENFD (1<<1) /* User quota limits
accounting */
#define XFS_QUOTA_PDQ_ENFD (1<<5) /* Project quota limits
enforcement */
-.in
.fi
+.in
.IP
This operation requires privilege
.RB ( CAP_SYS_ADMIN ).
.I <xfs/xqm.h>
as follows:
.IP
-.nf
.in +4n
+.nf
/* All the blk units are in BBs (Basic Blocks) of
512 bytes. */
int16_t d_padding3; /* Padding - for future use */
char d_padding4[8]; /* Yet more padding */
};
-.in
.fi
+.in
.IP
Unprivileged users may retrieve only their own quotas;
a privileged user
.I fs_quota_stat
structure itself is defined as follows:
.IP
-.nf
.in +4n
+.nf
#define FS_QSTAT_VERSION 1 /* fs_quota_stat.qs_version */
struct fs_qfilestat {
uint16_t qs_bwarnlimit; /* Limit for # of warnings */
uint16_t qs_iwarnlimit; /* Limit for # of warnings */
};
-.in
.fi
+.in
.IP
The
.I id
.I fs_quota_statv
structure itself is defined as follows:
.IP
-.nf
.in +4n
+.nf
#define FS_QSTATV_VERSION1 1 /* fs_quota_statv.qs_version */
struct fs_qfilestatv {
uint16_t qs_iwarnlimit; /* Limit for # of warnings */
uint64_t qs_pad2[8]; /* For future proofing */
};
-.in
.fi
+.in
.IP
The
.I qs_version
The error is supplied in a
.I sock_extended_err
structure:
+.IP
.in +4n
-.nf
-
+.EX
#define SO_EE_ORIGIN_NONE 0
#define SO_EE_ORIGIN_LOCAL 1
#define SO_EE_ORIGIN_ICMP 2
structure to minimize the number of directly supplied arguments.
This structure is defined as follows in
.IR <sys/socket.h> :
+.PP
.in +4n
-.nf
-
+.EX
struct iovec { /* Scatter/gather array items */
void *iov_base; /* Starting address */
size_t iov_len; /* Number of bytes to transfer */
of the control message sequence.
.PP
The messages are of the form:
+.PP
.in +4n
-.nf
-
+.EX
struct cmsghdr {
size_t cmsg_len; /* Data byte count, including header
(type is socklen_t in POSIX) */
.PP
The following snippet periodically generates UDP datagrams
containing a random number:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " while true; do echo $RANDOM > /dev/udp/127.0.0.1/1234; "
.B " sleep 0.25; done"
.fi
.PP
These datagrams are read by the example application, which
can give the following output:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out"
5 messages received
1 11782
.I param
argument, which is a pointer to a structure of the following form:
.PP
-.nf
.in +4n
+.nf
struct sched_param {
...
int sched_priority;
...
};
-.in
.fi
+.in
.PP
In the current implementation, the structure contains only one field,
.IR sched_priority .
and then construct a shell function that looks up system call
numbers on this architecture:
.PP
-.nf
.in +4n
+.nf
$ \fBuname -m\fP
x86_64
$ \fBsyscall_nr() {
cat /usr/src/linux/arch/x86/syscalls/syscall_64.tbl | \\
awk '$2 != "x32" && $3 == "'$1'" { print $1 }'
}\fP
-.in
.fi
+.in
.PP
When the BPF filter rejects a system call (case [2] above),
it causes the system call to fail with the error number
specified on the command line.
In the experiments shown here, we'll use error number 99:
.PP
-.nf
.in +4n
+.nf
$ \fBerrno 99\fP
EADDRNOTAVAIL 99 Cannot assign requested address
-.in
.fi
+.in
.PP
In the following example, we attempt to run the command
.BR whoami (1),
.BR execve (2)
system call, so that the command is not even executed:
.PP
-.nf
.in +4n
+.nf
$ \fBsyscall_nr execve\fP
59
$ \fB./a.out\fP
AUDIT_ARCH_X86_64: 0xC000003E
$ \fB./a.out 59 0xC000003E 99 /bin/whoami\fP
execv: Cannot assign requested address
-.in
.fi
+.in
.PP
In the next example, the BPF filter rejects the
.BR write (2)
.BR whoami (1)
command is not able to write output:
.PP
-.nf
.in +4n
+.nf
$ \fBsyscall_nr write\fP
1
$ \fB./a.out 1 0xC000003E 99 /bin/whoami\fP
-.in
.fi
+.in
.PP
In the final example,
the BPF filter rejects a system call that is not used by the
.BR whoami (1)
command, so it is able to successfully execute and produce output:
.PP
-.nf
.in +4n
+.nf
$ \fBsyscall_nr preadv\fP
295
$ \fB./a.out 295 0xC000003E 99 /bin/whoami\fP
cecilia
-.in
.fi
+.in
.SS Program source
.EX
#include <errno.h>
(and
.BR epoll (7)):
.PP
-.nf
.in +4n
+.nf
#define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP |
POLLERR)
/* Ready for reading */
/* Ready for writing */
#define POLLEX_SET (POLLPRI)
/* Exceptional condition */
-.in
.fi
+.in
.\"
.SS Multithreaded applications
If a file descriptor being monitored by
size_t ss_len; /* Size (in bytes) of object
pointed to by 'ss' */
};
-
-.fi
+.EE
.in
+.PP
This allows the system call to obtain both
a pointer to the signal set and its size,
while allowing for the fact that most architectures
.IR "union semun" .
The \fIcalling program\fP must define this union as follows:
.PP
-.nf
.in +4n
+.nf
union semun {
int val; /* Value for SETVAL */
struct semid_ds *buf; /* Buffer for IPC_STAT, IPC_SET */
struct seminfo *__buf; /* Buffer for IPC_INFO
(Linux-specific) */
};
-.in
.fi
+.in
.PP
The
.I semid_ds
data structure is defined in \fI<sys/sem.h>\fP as follows:
-.nf
+.PP
.in +4n
-
+.EX
struct semid_ds {
struct ipc_perm sem_perm; /* Ownership and permissions */
time_t sem_otime; /* Last semop time */
time_t sem_ctime; /* Last change time */
unsigned long sem_nsems; /* No. of semaphores in set */
};
-.in
.fi
+.in
.PP
The
.I ipc_perm
(the highlighted fields are settable using
.BR IPC_SET ):
.PP
-.nf
.in +4n
+.nf
struct ipc_perm {
key_t __key; /* Key supplied to semget(2) */
uid_t \fBuid\fP; /* Effective UID of owner */
unsigned short \fBmode\fP; /* Permissions */
unsigned short __seq; /* Sequence number */
};
-.in
.fi
+.in
.PP
Valid values for
.I cmd
if the
.B _GNU_SOURCE
feature test macro is defined:
-.nf
+.IP
.in +4n
-
+.EX
struct seminfo {
int semmap; /* Number of entries in semaphore
map; unused within kernel */
int semaem; /* Max. value that can be recorded for
semaphore adjustment (SEM_UNDO) */
};
-
+.EE
.in
-.fi
+.IP
The
.IR semmsl ,
.IR semmns ,
unsigned short sem_num; /* semaphore number */
short sem_op; /* semaphore operation */
short sem_flg; /* operation flags */
-.in
.fi
+.in
.PP
Flags recognized in
.I sem_flg
structure employed by
.BR sendmsg ()
is as follows:
+.PP
.in +4n
-.nf
-
+.EX
struct msghdr {
void *msg_name; /* optional address */
socklen_t msg_namelen; /* size of address */
Both of these system calls take an argument that is a pointer
to a structure of the following type:
.PP
-.nf
.in +4n
+.nf
struct user_desc {
unsigned int entry_number;
unsigned long base_addr;
unsigned int seg_not_present:1;
unsigned int useable:1;
};
-.in
.fi
+.in
.PP
.BR get_thread_area ()
reads the GDT entry indicated by
and then both processes display the hostnames in their UTS namespaces,
so that we can see that they are different.
.PP
-.nf
.in +4n
+.nf
$ \fBsu\fP # Need privilege for namespace operations
Password:
# \fB./newuts bizarro &\fP
uts.nodename in parent: antero
# \fBuname \-n\fP # Verify hostname in the shell
antero
-.in
.fi
+.in
.PP
We then run the program shown below,
using it to execute a shell.
Inside that shell, we verify that the hostname is the one
set by the child created by the first program:
.PP
-.nf
.in +4n
+.nf
# \fB./ns_exec /proc/3550/ns/uts /bin/bash\fP
# \fBuname \-n\fP # Executed in shell started by ns_exec
bizarro
-.in
.fi
+.in
.SS Program source
.EX
#define _GNU_SOURCE
if the
.B _GNU_SOURCE
feature test macro is defined:
-.nf
+.IP
.in +4n
-
+.EX
struct shminfo {
unsigned long shmmax; /* Maximum segment size */
unsigned long shmmin; /* Minimum segment size;
unsigned long shmall; /* Maximum number of pages of
shared memory, system-wide */
};
-
+.EE
.in
-.fi
+.IP
The
.IR shmmni ,
.IR shmmax ,
if the
.B _GNU_SOURCE
feature test macro is defined:
-.nf
+.IP
.in +4n
-
+.EX
struct shm_info {
int used_ids; /* # of currently existing
segments */
unsigned long swap_successes;
/* Unused since Linux 2.4 */
};
-.in
.fi
+.in
.TP
.BR SHM_STAT " (Linux-specific)"
Return a
.in +4
#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT)
#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
-.in
.fi
+.in
.IP
For some additional details,
see the discussion of the similarly named constants in
field.
This handler takes three arguments, as follows:
.PP
-.nf
.in +4n
+.nf
void
handler(int sig, siginfo_t *info, void *ucontext)
{
...
}
-.in
.fi
+.in
.PP
The
.I siginfo_t
.BR sysinfo ()
returned information in the following structure:
.PP
-.nf
.in +4n
+.nf
struct sysinfo {
long uptime; /* Seconds since boot */
unsigned long loads[3]; /* 1, 5, and 15 minute load averages */
unsigned short procs; /* Number of current processes */
char _f[22]; /* Pads structure to 64 bytes */
};
-.in
.fi
+.in
.PP
In the above structure, the sizes of the memory and swap fields
are given in bytes.
Since Linux 2.3.23 (i386) and Linux 2.3.48
(all architectures) the structure is:
.PP
-.nf
.in +4n
+.nf
struct sysinfo {
long uptime; /* Seconds since boot */
unsigned long loads[3]; /* 1, 5, and 15 minute load averages */
char _f[20\-2*sizeof(long)\-sizeof(int)];
/* Padding to 64 bytes */
};
-.in
.fi
+.in
.PP
In the above structure,
sizes of the memory and swap fields are given as multiples of
after creating a timer that has a frequency of 100 nanoseconds.
By the time the signal is unblocked and delivered,
there have been around ten million overruns.
+.PP
.in +4n
-.nf
-
+.EX
$ \fB./a.out 1 100\fP
Establishing handler for signal 34
Blocking signal 34
structure used for this argument contains two fields,
each of which is in turn a structure of type
.IR timespec :
+.PP
.in +4n
-.nf
-
+.EX
struct timespec {
time_t tv_sec; /* Seconds */
long tv_nsec; /* Nanoseconds */
The second and third command-line arguments are optional.
.PP
The following shell session demonstrates the use of the program:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " a.out 3 1 100"
0.000: timer started
3.000: read: 1; total=1
.I utsname
struct is defined in
.IR <sys/utsname.h> :
+.PP
.in +4n
-.nf
-
+.EX
struct utsname {
char sysname[]; /* Operating system name (e.g., "Linux") */
char nodename[]; /* Name within "some implementation-defined
char domainname[]; /* NIS or YP domain name */
#endif
};
-
-.fi
+.EE
.in
+.PP
The length of the arrays in a
.I struct utsname
is unspecified (see NOTES);
running a shell in a new mount namespace,
and verifying that the original shell and the
new shell are in separate mount namespaces:
+.PP
.in +4n
-.nf
-
+.EX
$ \fBreadlink /proc/$$/ns/mnt\fP
mnt:[4026531840]
$ \fBsudo ./unshare -m /bin/bash\fP
structures, each of which describes a page-fault event
or an event required for the non-cooperative userfaultfd usage:
.PP
-.nf
.in +4n
+.nf
struct uffd_msg {
__u8 event; /* Type of event */
...
/* Padding fields omitted */
} __packed;
-.in
.fi
+.in
.PP
If multiple events are available and the supplied buffer is large enough,
.BR read (2)
.PP
The following is an example of what we see when running the program:
.PP
-.nf
.in +4n
+.nf
$ \fB./userfaultfd_demo 3\fP
Address returned by mmap() = 0x7fd30106c000
Read address 0x7fd30106e40f in main(): C
Read address 0x7fd30106e80f in main(): C
Read address 0x7fd30106ec0f in main(): C
-.in
.fi
+.in
.SS Program source
\&
.EX
.I ustat
structure that contains the following
members:
+.PP
.in +4n
-.nf
-
+.EX
daddr_t f_tfree; /* Total free blocks */
ino_t f_tinode; /* Number of free inodes */
char f_fname[6]; /* Filsys name */
specifies a time as the number of seconds and nanoseconds
since the Epoch, 1970-01-01 00:00:00 +0000 (UTC).
This information is conveyed in a structure of the following form:
+.PP
.in +4n
-.nf
-
+.EX
struct timespec {
time_t tv_sec; /* seconds */
long tv_nsec; /* nanoseconds */
void *iov_base; /* Starting address */
size_t iov_len; /* Number of bytes */
};
-.in
.fi
+.in
.PP
The
.I flags
and uses the W*() macros described above to analyze the wait status value.
.PP
The following shell session demonstrates the use of the program:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out &"
Child PID is 32360
[1] 32359
in the structure pointed to by
.IR delta .
This structure has the following form:
+.PP
.in +4n
-.nf
-
+.EX
struct timeval {
time_t tv_sec; /* seconds */
suseconds_t tv_usec; /* microseconds */
.BR backtrace_symbols ().
The following shell session shows what we might see when running the
program:
-.nf
+.PP
.in +4n
-
+.EX
.RB "$" " cc \-rdynamic prog.c \-o prog"
.RB "$" " ./prog 3"
backtrace() returned 8 addresses
\&./prog(main+0x65) [0x80488fb]
\&/lib/libc.so.6(__libc_start_main+0xdc) [0xb7e38f9c]
\&./prog [0x8048711]
-.in
.fi
+.in
.SS Program source
\&
.EX
its command-line argument.
The following shell session demonstrates the use of the program:
.PP
-.nf
.in +4n
+.nf
$ \fB./a.out 0x0123456789abcdef\fP
0x123456789abcdef ==> 0xefcdab8967452301
-.in
.fi
+.in
.SS Program source
\&
.EX
is defined in the
.I <db.h>
include file as follows:
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
unsigned long flags;
unsigned int cachesize;
.BR clock_gettime (2)
to obtain the time on that clock.
An example run is the following:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out 1" " # Show CPU clock of init process"
CPU-time clock for PID 1 is 2.213466748 seconds
.fi
The following code fragment determines the path where to find
the POSIX.2 system utilities:
.br
-.nf
+.PP
.in +4n
-
+.EX
char *pathbuf;
size_t n;
if (pathbuf == NULL)
abort();
confstr(_CS_PATH, pathbuf, n);
-.in
.fi
+.in
.SH SEE ALSO
.BR getconf (1),
.BR sh (1),
.SS Key/data pairs
Access to all file types is based on key/data pairs.
Both keys and data are represented by the following data structure:
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
void *data;
size_t size;
field indicates the size of this array.
.PP
These program headers are structures of the following form:
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
Elf32_Word p_type; /* Segment type */
Elf32_Off p_offset; /* Segment file offset */
.IR <elf.h>
for further details):
.PP
-.nf
.in +4n
+.nf
#define PT_LOAD 1 /* Loadable program segment */
#define PT_DYNAMIC 2 /* Dynamic linking information */
#define PT_INTERP 3 /* Program interpreter */
#define PT_GNU_STACK 0x6474e551 /* Indicates stack executability */
.\" For PT_GNU_STACK, see http://www.airs.com/blog/archives/518
#define PT_GNU_RELRO 0x6474e552 /* Read-only after relocation */
-.in
.fi
+.in
.SH RETURN VALUE
The
.BR dl_iterate_phdr ()
The first shared object for which output is displayed
(where the name is an empty string)
is the main program.
-.nf
.in +4n
+.nf
$ \fB./a.out\fP
Name: "" (9 segments)
0: [ 0x400040; memsz: 1f8] flags: 0x5; PT_PHDR
4: [0x7f55716acec4; memsz: 604] flags: 0x4; PT_GNU_EH_FRAME
5: [0x7f557168f000; memsz: 0] flags: 0x6; PT_GNU_STACK
6: [0x7f55718afba0; memsz: 460] flags: 0x4; PT_GNU_RELRO
-.in
.fi
+.in
.PP
.SS Program source
\&
Dl_serpath dls_serpath[1]; /* Actually longer,
'dls_cnt' elements */
} Dl_serinfo;
-
-.fi
+.EE
.in
+.IP
Each of the
.I dls_serpath
elements in the above structure is a structure of the following form:
a string of characters that is converted to uppercase and
displayed on standard output.
An example of its use is the following:
+.PP
.in +4n
-.nf
-
+.EX
$ \fB./a.out abc\fP
ABC
.fi
are the reentrant versions.
They use the following
structure to hold the key data:
+.PP
.in +4n
-.nf
-
+.EX
struct crypt_data {
char keysched[16 * 8];
char sb0[32768];
with an argument of zero to find the current value of the program break.
.SH EXAMPLE
When run, the program below produces output such as the following:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out"
First address past:
program text (etext) 0x8048568
only one of these conversions will have an effect.
When we run this program on a little-endian system such as x86-32,
we see the following:
+.PP
.in +4n
-.nf
-
+.EX
$ \fB./a.out\fP
x.u32 = 0x44332211
htole32(x.u32) = 0x44332211
Display the current
.I errno
information string and exit:
+.PP
.in +4n
-.nf
-
+.EX
p = malloc(size);
if (p == NULL)
err(1, NULL);
.in
.PP
Display an error message and exit:
+.PP
.in +4n
-.nf
-
+.EX
if (tm.tm_hour < START_TIME)
errx(1, "too early, wait until %s", start_time_string);
.fi
.in
.PP
Warn of an error:
+.PP
.in +4n
-.nf
-
+.EX
fd = open(raw_device, O_RDONLY, 0);
if (fd == \-1)
warnx("%s: %s: trying the block device",
Exchange full.
.SH NOTES
A common mistake is to do
+.PP
.in +4n
-.nf
-
+.EX
if (somecall() == \-1) {
printf("somecall() failed\en");
if (errno == ...) { ... }
}
-
-.fi
+.EE
.in
+.PP
where
.I errno
no longer needs to have the value it had upon return from
If the value of
.I errno
should be preserved across a library call, it must be saved:
+.PP
.in +4n
-.nf
-
+.EX
if (somecall() == \-1) {
int errsv = errno;
printf("somecall() failed\en");
first command-line argument) reading integers,
and writes the squares of these integers to the output buffer.
An example of the output produced by this program is the following:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out \(aq1 23 43\(aq"
size=11; ptr=1 529 1849
.fi
argument is a structure that contains four fields pointing to the
programmer-defined hook functions that are used to implement this stream.
The structure is defined as follows
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
cookie_read_function_t *read;
cookie_write_function_t *write;
cookie_seek_function_t *seek;
cookie_close_function_t *close;
} cookie_io_functions_t;
-
-.fi
+.EE
.in
+.PP
The four fields are as follows:
.TP
.I cookie_read_function_t *read
and then seeks through the stream reading two out of every
five characters and writing them to standard output.
The following shell session demonstrates the use of the program:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out \(aqhello world\(aq"
/he/
/ w/
/d/
Reached end of file
-
-.fi
+.EE
.in
+.PP
Note that a more general version of the program below
could be improved to more robustly handle various error situations
(e.g., opening a stream with a cookie that already has an open stream;
.SH EXAMPLE
The program below produces results such as the following:
.PP
-.nf
.in +4n
+.nf
.RB "$" " ./a.out 2560"
frexp(2560, &e) = 0.625: 0.625 * 2^12 = 2560
.RB "$" " ./a.out \-4"
frexp(\-4, &e) = \-0.5: \-0.5 * 2^3 = \-4
-.in
.fi
+.in
.SS Program source
\&
.EX
The structure contains at least the following fields
(there are additional fields that
should be considered private to the implementation):
+.PP
.in +4n
-.nf
-
+.EX
typedef struct _ftsent {
unsigned short fts_info; /* flags for FTSENT structure */
char *fts_accpath; /* access path */
supplies when calling
\fIfn\fP()
is a pointer to a structure of type \fIFTW\fP:
+.PP
.in +4n
-.nf
-
+.EX
struct FTW {
int base;
int level;
};
-
-.fi
+.EE
.in
+.PP
.I base
is the offset of the filename (i.e., basename component)
in the pathname given in
char *ai_canonname;
struct addrinfo *ai_next;
};
-.in
.fi
+.in
.PP
The
.I hints
giving a speed-up compared to resolving the hostnames sequentially using
.BR getaddrinfo (3).
The program might be used like this:
+.PP
.in +4n
-.nf
-
+.EX
$ \fB./a.out ftp.us.kernel.org enoent.linuxfoundation.org gnu.cz\fP
ftp.us.kernel.org: 128.30.2.36
enoent.linuxfoundation.org: Name or service not known
The notification facility is not demonstrated.
.PP
An example session might look like this:
+.PP
.in +4n
-.nf
-
+.EX
$ \fB./a.out\fP
> a ftp.us.kernel.org enoent.linuxfoundation.org gnu.cz
> c 2
mcontext_t uc_mcontext;
...
} ucontext_t;
-
-.fi
+.EE
.in
+.PP
with
.IR sigset_t
and
value to be supplied to
.BR getgrouplist ().
The following shell session shows examples of the use of this program:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out cecilia 0"
getgrouplist() returned \-1; ngroups = 3
.RB "$" " ./a.out cecilia 3"
and
.BR getnameinfo (3).
Here is what we see when running this program on one system:
+.PP
.in +4n
-.nf
-
+.EX
$ \fB./a.out\fP
lo AF_PACKET (17)
tx_packets = 524; rx_packets = 524
int h_length;
char **h_addr_list;
};
-.in
.fi
+.in
.PP
These functions replace the
.BR gethostbyname (3)
for the supplied buffers,
.I <netdb.h>
defines the constants
+.PP
.in +4n
-.nf
-
+.EX
#define NI_MAXHOST 1025
#define NI_MAXSERV 32
.fi
#include <stdio.h> /* for printf */
#include <stdlib.h> /* for exit */
#include <getopt.h>
-
+.PP
int
main(int argc, char **argv)
{
int c;
int digit_optind = 0;
-
+.PP
while (1) {
int this_option_optind = optind ? optind : 1;
int option_index = 0;
{"file", required_argument, 0, 0 },
{0, 0, 0, 0 }
};
-
+.PP
c = getopt_long(argc, argv, "abc:d:012",
long_options, &option_index);
if (c == \-1)
break;
-
+.PP
switch (c) {
case 0:
printf("option %s", long_options[option_index].name);
printf(" with arg %s", optarg);
printf("\\n");
break;
-
+.PP
case \(aq0\(aq:
case \(aq1\(aq:
case \(aq2\(aq:
digit_optind = this_option_optind;
printf("option %c\\n", c);
break;
-
+.PP
case \(aqa\(aq:
printf("option a\\n");
break;
-
+.PP
case \(aqb\(aq:
printf("option b\\n");
break;
-
+.PP
case \(aqc\(aq:
printf("option c with value \(aq%s\(aq\\n", optarg);
break;
-
+.PP
case \(aqd\(aq:
printf("option d with value \(aq%s\(aq\\n", optarg);
break;
-
+.PP
case \(aq?\(aq:
break;
-
+.PP
default:
printf("?? getopt returned character code 0%o ??\\n", c);
}
}
-
+.PP
if (optind < argc) {
printf("non\-option ARGV\-elements: ");
while (optind < argc)
printf("%s ", argv[optind++]);
printf("\\n");
}
-
+.PP
exit(EXIT_SUCCESS);
}
.EE
.BR ERANGE ,
the program retries with larger buffer sizes.
The following shell session shows a couple of sample runs:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out tcp 1"
ERANGE! Retrying with larger buffer
getprotobyname_r() returned: 0 (success) (buflen=78)
.PP
or, better,
.PP
-.nf
.in +4n
+.nf
int
getpwent_r(struct passwd *pwd, char *buf, int buflen,
FILE **pw_fp);
-.in
.fi
+.in
.SH NOTES
The function
.BR getpwent_r ()
functions each return a pointer to an object with the
following structure containing the broken-out
fields of an entry in the RPC program number data base.
+.PP
.in +4n
-.nf
-
+.EX
struct rpcent {
char *r_name; /* name of server for this RPC program */
char **r_aliases; /* alias list */
.BR ERANGE ,
the program retries with larger buffer sizes.
The following shell session shows a couple of sample runs:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out 7 tcp 1"
ERANGE! Retrying with larger buffer
getservbyport_r() returned: 0 (success) (buflen=87)
The
.I ttyent
structure has the form:
+.PP
.in +4n
-.nf
-
+.EX
struct ttyent {
char *ty_name; /* terminal device name */
char *ty_getty; /* command to execute, usually getty */
.SH DESCRIPTION
This function returns the current file creation mask.
It is equivalent to
+.PP
.in +4n
-.nf
-
+.EX
mode_t getumask(void)
{
mode_t mask = umask( 0 );
umask(mask);
return mask;
}
-
-.fi
+.EE
.in
+.PP
except that it is documented to be thread-safe (that is, shares
a lock with the
.BR umask (2)
.in
.PP
in the shell:
-.nf
+.PP
.in +4n
-
+.EX
glob_t globbuf;
globbuf.gl_offs = 2;
globbuf.gl_pathv[0] = "ls";
globbuf.gl_pathv[1] = "\-l";
execvp("ls", &globbuf.gl_pathv[0]);
-.in
.fi
+.in
.SH SEE ALSO
.BR ls (1),
.BR sh (1),
These functions are glibc-specific.
.SH EXAMPLE
When run, the program below will produce output such as the following:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out"
GNU libc version: 2.8
GNU libc release: stable
is defined in the
.I <db.h>
include file as follows:
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
unsigned int bsize;
unsigned int ffactor;
char *key;
void *data;
} ENTRY;
-.in
.fi
+.in
.PP
The field \fIkey\fP points to a null-terminated string which is the
search key.
.BR inet_ntoa ()
is shown below.
Here are some example runs:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out 226.000.000.037" " # Last byte is in octal"
226.0.0.31
.RB "$" " ./a.out 0x7f.1 " " # First byte is in hex"
and
.BR inet_ntop (3).
Here are some example runs:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out i6 0:0:0:0:0:0:0:0"
::
.RB "$" " ./a.out i6 1:0:0:0:0:0:0:8"
The program below demonstrates the use of
.BR insque ().
Here is an example run of the program:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$ " "./a.out -c a b c"
Traversing completed list:
a
.IR "unsigned char" ,
as in the following example:
.PP
-.nf
.in +4n
+.nf
char c;
\&...
res = toupper((unsigned char) c);
-.in
.fi
+.in
.PP
This is necessary because
.I char
and
.BR swapcontext ().
Running the program produces the following output:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out"
main: swapcontext(&uctx_main, &uctx_func2)
func2: started
The following shell session shows what happens when we run this program
under glibc, with the default value for
.BR M_CHECK_ACTION :
+.PP
.in +4n
-.nf
-
+.EX
$ \fB./a.out\fP
main(): returned from first free() call
*** glibc detected *** ./a.out: double free or corruption (top): 0x09d30008 ***
.BR mbstowcs (),
as well as some of the wide character classification functions.
An example run is the following:
+.PP
.in +4n
-.nf
-
+.EX
$ ./t_mbstowcs de_DE.UTF\-8 Grüße!
Length of source string (excluding terminator):
8 bytes
with a NULL argument and then frees the same block of memory twice.
The following shell session demonstrates what happens
when running the program:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out"
About to free
is not found, the results are unpredictable.
The following call is a fast means of locating a string's
terminating null byte:
+.PP
.in +4n
-.nf
-
+.EX
char *p = rawmemchr(s,\ \(aq\\0\(aq);
.fi
.in
structure in the buffer pointed by
.IR attr .
This structure is defined as:
+.PP
.in +4n
-.nf
-
+.EX
struct mq_attr {
long mq_flags; /* Flags: 0 or O_NONBLOCK */
long mq_maxmsg; /* Max. # of messages on queue */
the maximum size of messages that the queue will allow.
This structure is defined as follows:
.PP
+.PP
.in +4n
-.nf
-
+.EX
struct mq_attr {
long mq_flags; /* Flags (ignored for mq_open()) */
long mq_maxmsg; /* Max. # of messages on queue */
calloc(16, 16); /* Never freed\-\-a memory leak */
exit(EXIT_SUCCESS);
}
-
-.fi
+.EE
.in
+.PP
When we run the program as follows, we see that
.BR mtrace ()
diagnosed memory leaks at two different locations in the program:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$ " "cc \-g t_mtrace.c \-o t_mtrace"
.RB "$ " "export MALLOC_TRACE=/tmp/t"
.RB "$ " "./t_mtrace"
category to
.IR fr_FR
(French):
+.PP
.in +4n
-.nf
-
+.EX
$ \fB./a.out fr_FR\fP
123456,789
Fri Mar 7 00:25:08 2014
category to
.IR it_IT
(Italian):
+.PP
.in +4n
-.nf
-
+.EX
$ \fB./a.out fr_FR it_IT\fP
123456,789
ven 07 mar 2014 00:26:01 CET
(which, here, specify
.IR mi_NZ ,
New Zealand Māori):
+.PP
.in +4n
-.nf
-
+.EX
$ LC_ALL=mi_NZ ./a.out fr_FR ""
123456,789
Te Paraire, te 07 o Poutū-te-rangi, 2014 00:38:44 CET
time_t tv_sec; /* Seconds since the Epoch */
suseconds_t tv_usec; /* Microseconds */
};
-
-.fi
+.EE
.in
+.IP
.TP
.I maxerror
Maximum error, in microseconds.
On a Linux/i386 system, when compiled using the default
.BR gcc (1)
options, the program below produces the following output:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out"
offsets: i=0; c=4; d=8 a=16
sizeof(struct s)=16
Some older UNIX implementations that support System V
(aka UNIX 98) pseudoterminals don't have this function, but it
is easy to implement:
+.PP
.in +4n
-.nf
-
+.EX
int
posix_openpt(int flags)
{
the position in the argument list of the desired argument, indexed starting
from 1.
Thus,
+.PP
.in +4n
-.nf
-
+.EX
printf("%*d", width, num);
-
-.fi
+.EE
.in
+.PP
and
+.PP
.in +4n
-.nf
-
+.EX
printf("%2$*1$d", width, num);
-
-.fi
+.EE
.in
+.PP
are equivalent.
The second style allows repeated references to the
same argument.
The POSIX locale
uses \(aq.\(aq as radix character, and does not have a grouping character.
Thus,
+.PP
.in +4n
-.nf
-
+.EX
printf("%\(aq.2f", 1234567.89);
-
-.fi
+.EE
.in
+.PP
results in "1234567.89" in the POSIX locale, in "1234567,89" in the
nl_NL locale, and in "1.234.567,89" in the da_DK locale.
.SS Flag characters
To print
.I Pi
to five decimal places:
+.PP
.in +4n
-.nf
-
+.EX
#include <math.h>
#include <stdio.h>
fprintf(stdout, "pi = %.5f\en", 4 * atan(1.0));
and
.I month
are pointers to strings:
+.PP
.in +4n
-.nf
-
+.EX
#include <stdio.h>
fprintf(stdout, "%s, %s %d, %.2d:%.2d\en",
weekday, month, day, hour, min);
Many countries use the day-month-year order.
Hence, an internationalized version must be able to print
the arguments in an order specified by the format:
+.PP
.in +4n
-.nf
-
+.EX
#include <stdio.h>
fprintf(stdout, format,
weekday, month, day, hour, min);
-
-.fi
+.EE
.in
+.PP
where
.I format
depends on locale, and may permute the arguments.
With the value:
+.PP
.in +4n
-.nf
-
+.EX
"%1$s, %3$d. %2$s, %4$d:%5$.2d\en"
-
-.fi
+.EE
.in
+.PP
one might obtain "Sonntag, 3. Juli, 10:02".
.PP
To allocate a sufficiently large string and print into it
.BR pthread_getname_np ().
.PP
The following shell session shows a sample run of the program:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out"
Created a thread. Default name is: a.out
The thread name after setting it is THREADFOO.
take their scheduling attributes from the thread attributes object.
The program then creates a thread using the thread attributes object,
and that thread displays its scheduling policy and priority.
+.PP
.in +4n
-.nf
-
+.EX
$ \fBsu\fP # Need privilege to set real-time scheduling policies
Password:
# \fB./a.out \-mf10 \-ar20 \-i e\fP
The procedure
.I dispatch
has the following form:
+.IP
.in +4n
-.nf
-
+.EX
dispatch(struct svc_req *request, SVCXPRT *xprt);
.fi
.in
The caller must
.BR free (3)
the returned string, as in the following example:
+.PP
.in +4n
-.nf
-
+.EX
char *p;
int n;
is glibc-specific.
.SH NOTES
The call
+.PP
.in +4n
-.nf
-
+.EX
cpu = sched_getcpu();
-
-.fi
+.EE
.in
+.PP
is equivalent to the following
.BR getcpu (2)
call:
+.PP
.in +4n
-.nf
-
+.EX
int c, s;
s = getcpu(&c, NULL, NULL);
cpu = (s == \-1) ? s : c;
in seconds and nanoseconds since the Epoch, 1970-01-01 00:00:00 +0000 (UTC).
This structure is defined as follows:
.PP
-.nf
.in +4n
+.nf
struct timespec {
time_t tv_sec; /* Seconds */
long tv_nsec; /* Nanoseconds [0 .. 999999999] */
};
-.in
.fi
+.in
.PP
If the timeout has already expired by the time of the call,
and the semaphore could not be locked immediately,
.I "struct aliasent"
is defined in
.IR <aliases.h> :
+.PP
.in +4n
-.nf
-
+.EX
struct aliasent {
char *alias_name; /* alias name */
size_t alias_members_len;
.SH CONFORMING TO
These routines are glibc-specific.
The NeXT system has similar routines:
+.PP
.in +4n
-.nf
-
+.EX
#include <aliasdb.h>
void alias_setent(void);
wrapper, this argument,
.IR uinfo ,
is initialized as follows:
+.PP
.in +4n
-.nf
-
+.EX
uinfo.si_signo = sig; /* Argument supplied to sigqueue() */
uinfo.si_code = SI_QUEUE;
uinfo.si_pid = getpid(); /* Process ID of sender */
The
.I sigvec
structure has the following form:
+.PP
.in +4n
-.nf
-
+.EX
struct sigvec {
void (*sv_handler)(int); /* Signal disposition */
int sv_mask; /* Signals to be blocked in handler */
int sv_flags; /* Flags */
};
-
-.fi
+.EE
.in
+.PP
The
.I sv_handler
field specifies the disposition of the signal, and is either:
be a pointer to the stack frame of the variadic function.
In such a setup (by far the most common) there seems
nothing against an assignment
+.PP
.in +4n
-.nf
-
+.EX
va_list aq = ap;
-
-.fi
+.EE
.in
+.PP
Unfortunately, there are also systems that make it an
array of pointers (of length 1), and there one needs
+.PP
.in +4n
-.nf
-
+.EX
va_list aq;
*aq = *ap;
-
-.fi
+.EE
.in
+.PP
Finally, on systems where arguments are passed in registers,
it may be necessary for
.BR va_start ()
To accommodate this situation, C99 adds a macro
.BR va_copy (),
so that the above assignment can be replaced by
+.PP
.in +4n
-.nf
-
+.EX
va_list aq;
va_copy(aq, ap);
\&...
va_end(aq);
-
-.fi
+.EE
.in
+.PP
Each invocation of
.BR va_copy ()
must be matched by a corresponding invocation of
.IR <varargs.h> .
.PP
The historic setup is:
+.PP
.in +4n
-.nf
-
+.EX
#include <varargs.h>
void
}
va_end(ap);
}
-
-.fi
+.EE
.in
+.PP
On some systems,
.I va_end
contains a closing \(aq}\(aq matching a \(aq{\(aq in
A simple implementation of
.BR strncat ()
might be:
+.PP
.in +4n
-.nf
-
+.EX
char *
strncat(char *dest, const char *src, size_t n)
{
A simple implementation of
.BR strncpy ()
might be:
+.PP
.in +4n
-.nf
-
+.EX
char *
strncpy(char *dest, const char *src, size_t n)
{
has length
.IR buflen ,
you can force termination using something like the following:
+.PP
.in +4n
-.nf
-
+.EX
strncpy(buf, str, buflen \- 1);
if (buflen > 0)
buf[buflen \- 1]= \(aq\\0\(aq;
problem.
A relatively clean one is to add an
intermediate function
+.PP
.in +4n
-.nf
-
+.EX
size_t
my_strftime(char *s, size_t max, const char *fmt,
const struct tm *tm)
Some examples of the result string produced by the glibc implementation of
.BR strftime ()
are as follows:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out \(aq%m\(aq"
Result string is "11"
.RB "$" " ./a.out \(aq%5m\(aq"
.EX
.BI "quad_t strtoq(const char *" nptr ", char **" endptr ", int " base );
.EX
-.in
.fi
+.in
with completely analogous definition.
Depending on the wordsize of the current architecture, this
may be equivalent to
has a simpler interface than
.BR strtol ().)
Some examples of the results produced by this program are the following:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " ./a.out 123"
strtol() returned 123
.RB "$" " ./a.out \(aq 123\(aq"
.IR "unsigned char" ,
as in the following example:
.PP
-.nf
.in +4n
+.nf
char c;
\&...
res = toupper((unsigned char) c);
-.in
.fi
+.in
.PP
This is necessary because
.I char
also provides the declaration with the following
feature test macro definitions:
.PP
-.nf
.in +4n
+.nf
(_XOPEN_SOURCE >= 500 ||
(_XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED))
&& ! (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600)
-.in
.fi
+.in
.PP
Minix also has
.IR fttyslot ( fd ).
are valid for the indicated resource and then send a reply with the
following format:
.IP
+.IP
.in +4n
-.nf
-
+.EX
struct fuse_open_out {
uint64_t fh;
uint32_t open_flags;
uint32_t padding;
};
-
-.fi
+.EE
.in
.IP
+.IP
The
.I fh
field is an opaque identifier that the kernel will use to refer
.RE
.TP
.BR FUSE_READ " and " FUSE_READDIR
+.IP
.in +4n
-.nf
-
+.EX
struct fuse_read_in {
uint64_t fh;
uint64_t offset;
uint32_t flags;
uint32_t padding;
};
-
-.fi
+.EE
.in
.IP
+.IP
The requested action is to read up to
.I size
bytes of the file or directory, starting at
.I <linux/loop.h>
as:
.IP
-.nf
.in +4n
+.nf
struct loop_info {
int lo_number; /* ioctl r/o */
dev_t lo_device; /* ioctl r/o */
unsigned long lo_init[2];
char reserved[4];
};
-.in
.fi
+.in
.IP
The encryption type
.RI ( lo_encrypt_type )
structure,
which has some additional fields and a larger range for some other fields:
.IP
-.nf
.in +4n
+.nf
struct loop_info64 {
uint64_t lo_device; /* ioctl r/o */
uint64_t lo_inode; /* ioctl r/o */
uint8_t lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
uint64_t lo_init[2];
};
-.in
.fi
+.in
.SS /dev/loop-control
Since Linux 3.1,
.\" commit 770fe30a46a12b6fb6b63fbe1737654d28e84844
and then associates the loop device with the backing store.
The following shell session demonstrates the use of the program:
.PP
-.nf
.in +4n
+.nf
$ \fBdd if=/dev/zero of=file.img bs=1MiB count=10\fP
10+0 records in
10+0 records out
10485760 bytes (10 MB) copied, 0.00609385 s, 1.7 GB/s
$ \fBsudo ./mnt_loop file.img\fP
loopname = /dev/loop5
-.in
.fi
+.in
.SS Program source
\&
.EX
with two minor numbers for different options.)
.PP
Devices are typically created by:
+.PP
.in +4n
-.nf
-
+.EX
mknod \-m 666 /dev/st0 c 9 0
mknod \-m 666 /dev/st0l c 9 32
mknod \-m 666 /dev/st0m c 9 64
commands (e.g., rewind).
.PP
An example:
+.PP
.in +4n
-.nf
-
+.EX
struct mtop mt_cmd;
mt_cmd.mt_op = MTSETDRVBUFFER;
mt_cmd.mt_count = MT_ST_BOOLEANS |
.I uint32_t
or
.IR uint64_t ):
+.PP
.in +4n
-.nf
-
+.EX
ElfN_Addr Unsigned program address, uintN_t
ElfN_Off Unsigned file offset, uintN_t
ElfN_Section Unsigned section index, uint16_t
.I Elf32_Ehdr
or
.IR Elf64_Ehdr :
+.PP
.in +4n
-.nf
-
+.EX
#define EI_NIDENT 16
typedef struct {
or
.I Elf64_Phdr
depending on the architecture:
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
uint32_t p_type;
Elf32_Off p_offset;
} Elf32_Phdr;
.fi
.in
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
uint32_t p_type;
uint32_t p_flags;
reserved indices.
.PP
The section header has the following structure:
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
uint32_t sh_name;
uint32_t sh_type;
} Elf32_Shdr;
.fi
.in
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
uint32_t sh_name;
uint32_t sh_type;
relocate a program's symbolic definitions and references.
A symbol table
index is a subscript into this array.
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
uint32_t st_name;
Elf32_Addr st_value;
} Elf32_Sym;
.fi
.in
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
uint32_t st_name;
unsigned char st_info;
Relocation entries are these data.
.PP
Relocation structures that do not need an addend:
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
Elf32_Addr r_offset;
uint32_t r_info;
} Elf32_Rel;
.fi
.in
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
Elf64_Addr r_offset;
uint64_t r_info;
.in
.PP
Relocation structures that need an addend:
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
Elf32_Addr r_offset;
uint32_t r_info;
} Elf32_Rela;
.fi
.in
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
Elf64_Addr r_offset;
uint64_t r_info;
member controls the interpretation
of
.IR d_un .
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
Elf32_Sword d_tag;
union {
extern Elf32_Dyn _DYNAMIC[];
.fi
.in
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
Elf64_Sxword d_tag;
union {
one for core files and one for all other ELF types.
If the namespace is unknown, then tools will usually fallback to these sets
of notes as well.
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
Elf32_Word n_namesz;
Elf32_Word n_descsz;
} Elf32_Nhdr;
.fi
.in
+.PP
.in +4n
-.nf
-
+.EX
typedef struct {
Elf64_Word n_namesz;
Elf64_Word n_descsz;
hierarchy, and put the shell into that cgroup:
.PP
.EX
-.nf
.in +4n
+.nf
# \fBmkdir \-p /sys/fs/cgroup/freezer/sub\fP
# \fBecho $$\fP # Show PID of this shell
30655
# \fBsh \-c \(aqecho 30655 > /sys/fs/cgroup/freezer/sub/cgroup.procs\(aq\fP
# \fBcat /proc/self/cgroup | grep freezer\fP
7:freezer:/sub
-.in
.fi
+.in
.EE
.PP
Next, we use
.I cpu
controller as follows:
.PP
-.nf
.in +4n
+.nf
mount \-t cgroup \-o cpu none /sys/fs/cgroup/cpu
-.in
.fi
+.in
.PP
It is possible to comount multiple controllers against the same hierarchy.
For example, here the
.IR cpuacct
controllers are comounted against a single hierarchy:
.PP
-.nf
.in +4n
+.nf
mount \-t cgroup \-o cpu,cpuacct none /sys/fs/cgroup/cpu,cpuacct
-.in
.fi
+.in
.PP
Comounting controllers has the effect that a process is in the same cgroup for
all of the comounted controllers.
.PP
It is possible to comount all v1 controllers against the same hierarchy:
.PP
-.nf
.in +4n
+.nf
mount \-t cgroup \-o all cgroup /sys/fs/cgroup
-.in
.fi
+.in
.PP
(One can achieve the same result by omitting
.IR "\-o all" ,
An example of the contents of this file (reformatted for readability)
is the following:
.IP
-.nf
.in +4n
+.nf
#subsys_name hierarchy num_cgroups enabled
cpuset 4 1 1
cpu 8 1 1
net_prio 9 1 1
hugetlb 0 1 0
pids 2 1 1
-.in
.fi
+.in
.IP
The fields in this file are, from left to right:
.RS
and what feature test macros are explicitly set.
The following shell session, on a system with glibc 2.10,
shows some examples of what we would see:
+.PP
.in +4n
-.nf
-
+.EX
$ \fBcc ftm.c\fP
$ \fB./a.out\fP
_POSIX_SOURCE defined
.PP
Thus, to test for a regular file (for example), one could write:
.PP
-.nf
.in +4n
+.nf
stat(pathname, &sb);
if ((sb.st_mode & S_IFMT) == S_IFREG) {
/* Handle regular file */
}
-.in
.fi
+.in
.PP
Because tests of the above form are common, additional
macros are defined by POSIX to allow the test of the file type in
.PP
The preceding code snippet could thus be rewritten as:
.PP
-.nf
.in +4n
+.nf
stat(pathname, &sb);
if (S_ISREG(sb.st_mode)) {
/* Handle regular file */
}
-.in
.fi
+.in
.PP
The definitions of most of the above file type test macros
are provided if any of the following feature test macros is defined:
Each successful
.BR read (2)
returns a buffer containing one or more of the following structures:
+.PP
.in +4n
-.nf
-
+.EX
struct inotify_event {
int wd; /* Watch descriptor */
.\" FIXME . The type of the 'wd' field should probably be "int32_t".
32100fab I--Q--- 4 perm 1f3f0000 1000 65534 keyring _uid.1000: 2
32a387ea I--Q--- 1 perm 3f010000 1000 1000 keyring _pid: 2
3ce56aea I--Q--- 5 perm 3f030000 1000 1000 keyring _ses: 1
-.in
.fi
+.in
.IP
The fields shown in each line of this file are as follows:
.RS
has at least one key on the system.
An example of the data that one might see in this file is the following:
.IP
-.nf
.in +4n
+.nf
0: 10 9/9 2/1000000 22/25000000
42: 9 9/9 8/200 106/20000
1000: 11 11/11 10/200 271/20000
-.in
.fi
+.in
.IP
The fields shown in each line are as follows:
.RS
.\" AUTHORS sections are discouraged
.\" AUTHORS [Discouraged]
\fBSEE ALSO\fP
-
-.fi
+.EE
.in
+.PP
.IR "Where a traditional heading would apply" ", " "please use it" ;
this kind of consistency can make the information easier to understand.
If you must, you can create your own
Complete commands should, if long,
be written as an indented line on their own,
with a blank line before and after the command, for example
+.PP
.in +4n
-.nf
-
+.EX
man 7 man-pages
-
-.fi
+.EE
.in
+.PP
If the command is short, then it can be included inline in the text,
in italic format, for example,
.IR "man 7 man-pages" .
function should set
.I errno
to zero, and make the following call
+.PP
.in +4n
-.nf
-
+.EX
feclearexcept(FE_ALL_EXCEPT);
-
-.fi
+.EE
.in
+.PP
before calling a mathematical function.
.PP
Upon return from the mathematical function, if
is nonzero, or the following call (see
.BR fenv (3))
returns nonzero
+.PP
.in +4n
-.nf
-
+.EX
fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW |
FE_UNDERFLOW);
-
-.fi
+.EE
.in
+.PP
.\" enum
.\" {
.\" FE_INVALID = 0x01,
.BR log (3)'s
argument is not a NaN and is not zero (a pole error) or
less than zero (a domain error):
+.PP
.in +4n
-.nf
-
+.EX
double x, r;
if (isnan(x) || islessequal(x, 0)) {
}
r = log(x);
-
-.fi
+.EE
.in
+.PP
The discussion on this page does not apply to the complex
mathematical functions (i.e., those declared by
.IR <complex.h> ),
and then view the mounts in
.IR /proc/self/mountinfo :
.PP
-.nf
.in +4n
+.nf
sh1# \fBmount \-\-make\-shared /mntS\fP
sh1# \fBmount \-\-make\-private /mntP\fP
sh1# \fBcat /proc/self/mountinfo | grep \(aq/mnt\(aq | sed \(aqs/ \- .*//\(aq\fP
77 61 8:17 / /mntS rw,relatime shared:1
83 61 8:15 / /mntP rw,relatime
-.in
.fi
+.in
.PP
From the
.IR /proc/self/mountinfo
.IR / ,
which is mounted as private:
.PP
-.nf
.in +4n
+.nf
sh1# \fBcat /proc/self/mountinfo | awk \(aq$1 == 61\(aq | sed \(aqs/ \- .*//\(aq\fP
61 0 8:2 / / rw,relatime
-.in
.fi
+.in
.PP
On a second terminal,
we create a new mount namespace where we run a second shell
and inspect the mounts:
.PP
-.nf
.in +4n
+.nf
$ \fBPS1=\(aqsh2# \(aq sudo unshare \-m \-\-propagation unchanged sh\fP
sh2# \fBcat /proc/self/mountinfo | grep \(aq/mnt\(aq | sed \(aqs/ \- .*//\(aq\fP
222 145 8:17 / /mntS rw,relatime shared:1
225 145 8:15 / /mntP rw,relatime
-.in
.fi
+.in
.PP
The new mount namespace received a copy of the initial mount namespace's
mount points.
.IR /mntP
and inspect the set-up:
.PP
-.nf
.in +4n
+.nf
sh2# \fBmkdir /mntS/a\fP
sh2# \fBmount /dev/sdb6 /mntS/a\fP
sh2# \fBmkdir /mntP/b\fP
225 145 8:15 / /mntP rw,relatime
178 222 8:22 / /mntS/a rw,relatime shared:2
230 225 8:23 / /mntP/b rw,relatime
-.in
.fi
+.in
.PP
From the above, it can be seen that
.IR /mntS/a
.IR /mntP
did not propagate:
.PP
-.nf
.in +4n
+.nf
sh1# \fBcat /proc/self/mountinfo | grep \(aq/mnt\(aq | sed \(aqs/ \- .*//\(aq\fP
77 61 8:17 / /mntS rw,relatime shared:1
83 61 8:15 / /mntP rw,relatime
179 77 8:22 / /mntS/a rw,relatime shared:2
-.in
.fi
+.in
.\"
.SS MS_SLAVE example
Making a mount point a slave allows it to receive propagated
We can demonstrate the effect of slaving by first marking
two mount points as shared in the initial mount namespace:
.PP
-.nf
.in +4n
+.nf
sh1# \fBmount \-\-make\-shared /mntX\fP
sh1# \fBmount \-\-make\-shared /mntY\fP
sh1# \fBcat /proc/self/mountinfo | grep \(aq/mnt\(aq | sed \(aqs/ \- .*//\(aq\fP
132 83 8:23 / /mntX rw,relatime shared:1
133 83 8:22 / /mntY rw,relatime shared:2
-.in
.fi
+.in
.PP
On a second terminal,
we create a new mount namespace and inspect the mount points:
.PP
-.nf
.in +4n
+.nf
sh2# \fBunshare \-m \-\-propagation unchanged sh\fP
sh2# \fBcat /proc/self/mountinfo | grep \(aq/mnt\(aq | sed \(aqs/ \- .*//\(aq\fP
168 167 8:23 / /mntX rw,relatime shared:1
169 167 8:22 / /mntY rw,relatime shared:2
-.in
.fi
+.in
.PP
In the new mount namespace, we then mark one of the mount points as a slave:
.PP
-.nf
.in +4n
+.nf
sh2# \fBmount \-\-make\-slave /mntY\fP
sh2# \fBcat /proc/self/mountinfo | grep \(aq/mnt\(aq | sed \(aqs/ \- .*//\(aq\fP
168 167 8:23 / /mntX rw,relatime shared:1
169 167 8:22 / /mntY rw,relatime master:2
-.in
.fi
+.in
.PP
From the above output, we see that
.IR /mntY
and
.IR /mntY :
.PP
-.nf
.in +4n
+.nf
sh2# \fBmkdir /mntX/a\fP
sh2# \fBmount /dev/sda3 /mntX/a\fP
sh2# \fBmkdir /mntY/b\fP
sh2# \fBmount /dev/sda5 /mntY/b\fP
-.in
.fi
+.in
.PP
When we inspect the state of the mount points in the new mount namespace,
we see that
.IR /mntY/b
was created as a private mount:
.PP
-.nf
.in +4n
+.nf
sh2# \fBcat /proc/self/mountinfo | grep \(aq/mnt\(aq | sed \(aqs/ \- .*//\(aq\fP
168 167 8:23 / /mntX rw,relatime shared:1
169 167 8:22 / /mntY rw,relatime master:2
173 168 8:3 / /mntX/a rw,relatime shared:3
175 169 8:5 / /mntY/b rw,relatime
-.in
.fi
+.in
.PP
Returning to the first terminal (in the initial mount namespace),
we see that the mount
.IR /mntY/b
was not propagated:
.PP
-.nf
.in +4n
+.nf
sh1# \fBcat /proc/self/mountinfo | grep \(aq/mnt\(aq | sed \(aqs/ \- .*//\(aq\fP
132 83 8:23 / /mntX rw,relatime shared:1
133 83 8:22 / /mntY rw,relatime shared:2
174 132 8:3 / /mntX/a rw,relatime shared:3
-.in
.fi
+.in
.PP
Now we create a new mount point under
.IR /mntY
in the first shell:
.PP
-.nf
.in +4n
+.nf
sh1# \fBmkdir /mntY/c\fP
sh1# \fBmount /dev/sda1 /mntY/c\fP
sh1# \fBcat /proc/self/mountinfo | grep '/mnt' | sed 's/ \- .*//'\fP
133 83 8:22 / /mntY rw,relatime shared:2
174 132 8:3 / /mntX/a rw,relatime shared:3
178 133 8:1 / /mntY/c rw,relatime shared:4
-.in
.fi
+.in
.PP
When we examine the mount points in the second mount namespace,
we see that in this case the new mount has been propagated
to the slave mount point,
and that the new mount is itself a slave mount (to peer group 4):
.PP
-.nf
.in +4n
+.nf
sh2# \fBcat /proc/self/mountinfo | grep \(aq/mnt\(aq | sed \(aqs/ \- .*//\(aq\fP
168 167 8:23 / /mntX rw,relatime shared:1
169 167 8:22 / /mntY rw,relatime master:2
173 168 8:3 / /mntX/a rw,relatime shared:3
175 169 8:5 / /mntY/b rw,relatime
179 169 8:1 / /mntY/c rw,relatime master:4
-.in
.fi
+.in
.\"
.SS MS_UNBINDABLE example
One of the primary purposes of unbindable mounts is to avoid
.PP
Suppose we have a system with the following mount points:
.PP
-.nf
.in +4n
+.nf
# \fBmount | awk \(aq{print $1, $2, $3}\(aq\fP
/dev/sda1 on /
/dev/sdb6 on /mntX
/dev/sdb7 on /mntY
-.in
.fi
+.in
.PP
Suppose furthermore that we wish to recursively bind mount
the root directory under several users' home directories.
We do this for the first user, and inspect the mount points:
.PP
-.nf
.in +4n
+.nf
# \fBmount \-\-rbind / /home/cecilia/\fP
# \fBmount | awk \(aq{print $1, $2, $3}\(aq\fP
/dev/sda1 on /
/dev/sda1 on /home/cecilia
/dev/sdb6 on /home/cecilia/mntX
/dev/sdb7 on /home/cecilia/mntY
-.in
.fi
+.in
.PP
When we repeat this operation for the second user,
we start to see the explosion problem:
.PP
-.nf
.in +4n
+.nf
# \fBmount \-\-rbind / /home/henry\fP
# \fBmount | awk \(aq{print $1, $2, $3}\(aq\fP
/dev/sda1 on /
/dev/sda1 on /home/henry/home/cecilia
/dev/sdb6 on /home/henry/home/cecilia/mntX
/dev/sdb7 on /home/henry/home/cecilia/mntY
-.in
.fi
+.in
.PP
Under
.IR /home/henry ,
Upon repeating the step for a third user,
it becomes obvious that the explosion is exponential in nature:
.PP
-.nf
.in +4n
+.nf
# \fBmount \-\-rbind / /home/otto\fP
# \fBmount | awk \(aq{print $1, $2, $3}\(aq\fP
/dev/sda1 on /
/dev/sda1 on /home/otto/home/henry/home/cecilia
/dev/sdb6 on /home/otto/home/henry/home/cecilia/mntX
/dev/sdb7 on /home/otto/home/henry/home/cecilia/mntY
-.in
.fi
+.in
.PP
The mount explosion problem in the above scenario can be avoided
by making each of the new mounts unbindable.
directory will not replicate the unbindable mounts.
We make such a mount for the first user:
.PP
-.nf
.in +4n
+.nf
# \fBmount \-\-rbind \-\-make\-unbindable / /home/cecilia\fP
-.in
.fi
+.in
.PP
Before going further, we show that unbindable mounts are indeed unbindable:
.PP
-.nf
.in +4n
+.nf
# \fBmkdir /mntZ\fP
# \fBmount \-\-bind /home/cecilia /mntZ\fP
mount: wrong fs type, bad option, bad superblock on /home/cecilia,
In some cases useful info is found in syslog \- try
dmesg | tail or so.
-.in
.fi
+.in
.PP
Now we create unbindable recursive bind mounts for the other two users:
.PP
-.nf
.in +4n
+.nf
# \fBmount \-\-rbind \-\-make\-unbindable / /home/henry\fP
# \fBmount \-\-rbind \-\-make\-unbindable / /home/otto\fP
-.in
.fi
+.in
.PP
Upon examining the list of mount points,
we see there has been no explosion of mount points,
because the unbindable mounts were not replicated
under each user's directory:
.PP
-.nf
.in +4n
+.nf
# \fBmount | awk \(aq{print $1, $2, $3}\(aq\fP
/dev/sda1 on /
/dev/sdb6 on /mntX
/dev/sda1 on /home/otto
/dev/sdb6 on /home/otto/mntX
/dev/sdb7 on /home/otto/mntY
-.in
.fi
+.in
.\"
.SS Propagation type transitions
The following table shows the effect that applying a new propagation type
filesystem remains visible at the correct location
in the chroot-ed environment.
.PP
-.nf
.in +4n
+.nf
# \fBmkdir \-p /mnt/proc\fP
# \fBmount \-\-bind / /mnt\fP
# \fBmount \-\-bind /proc /mnt/proc\fP
-.in
.fi
+.in
.PP
Next, we ensure that the
.IR /mnt
mount is a shared mount in a new peer group (with no peers):
.PP
-.nf
.in +4n
+.nf
# \fBmount \-\-make\-private /mnt\fP # Isolate from any previous peer group
# \fBmount \-\-make\-shared /mnt\fP
# \fBcat /proc/self/mountinfo | grep \(aq/mnt\(aq | sed \(aqs/ \- .*//\(aq\fP
239 61 8:2 / /mnt ... shared:102
248 239 0:4 / /mnt/proc ... shared:5
-.in
.fi
+.in
.PP
Next, we bind mount
.IR /mnt/etc
onto
.IR /tmp/etc :
.PP
-.nf
.in +4n
+.nf
# \fBmkdir \-p /tmp/etc\fP
# \fBmount \-\-bind /mnt/etc /tmp/etc\fP
# \fBcat /proc/self/mountinfo | egrep \(aq/mnt|/tmp/\(aq | sed \(aqs/ \- .*//\(aq\fP
239 61 8:2 / /mnt ... shared:102
248 239 0:4 / /mnt/proc ... shared:5
267 40 8:2 /etc /tmp/etc ... shared:102
-.in
.fi
+.in
.PP
Initially, these two mount points are in the same peer group,
but we then make the
shared as well,
so that it can propagate events to the next slave in the chain:
.PP
-.nf
.in +4n
+.nf
# \fBmount \-\-make\-slave /tmp/etc\fP
# \fBmount \-\-make\-shared /tmp/etc\fP
# \fBcat /proc/self/mountinfo | egrep \(aq/mnt|/tmp/\(aq | sed \(aqs/ \- .*//\(aq\fP
239 61 8:2 / /mnt ... shared:102
248 239 0:4 / /mnt/proc ... shared:5
267 40 8:2 /etc /tmp/etc ... shared:105 master:102
-.in
.fi
+.in
.PP
Then we bind mount
.IR /tmp/etc
a slave of
.IR /tmp/etc :
.PP
-.nf
.in +4n
+.nf
# \fBmkdir \-p /mnt/tmp/etc\fP
# \fBmount \-\-bind /tmp/etc /mnt/tmp/etc\fP
# \fBmount \-\-make\-slave /mnt/tmp/etc\fP
248 239 0:4 / /mnt/proc ... shared:5
267 40 8:2 /etc /tmp/etc ... shared:105 master:102
273 239 8:2 /etc /mnt/tmp/etc ... master:105
-
+.EE
.in
-.fi
+.PP
From the above, we see that
.IR /mnt
is the master of the slave
directory, which renders the mount with ID 267 unreachable
from the (new) root directory:
.PP
-.nf
.in +4n
+.nf
# \fBchroot /mnt\fP
-.in
.fi
+.in
.PP
When we examine the state of the mounts inside the chroot-ed environment,
we see the following:
.PP
-.nf
.in +4n
+.nf
# \fBcat /proc/self/mountinfo | sed \(aqs/ \- .*//\(aq\fP
239 61 8:2 / / ... shared:102
248 239 0:4 / /proc ... shared:5
273 239 8:2 /etc /tmp/etc ... master:105 propagate_from:102
-.in
.fi
+.in
.PP
Above, we see that the mount with ID 273
is a slave whose master is the peer group 105.
but the details are likely to differ.)
This filesystem can be mounted (by the superuser) using the following
commands:
+.PP
.in +4n
-.nf
-
+.EX
.RB "#" " mkdir /dev/mqueue"
.RB "#" " mount \-t mqueue none /dev/mqueue"
-
-.fi
+.EE
.in
+.PP
The sticky bit is automatically enabled on the mount directory.
.PP
After the filesystem has been mounted, the message queues on the system
.PP
The contents of each file in the directory consist of a single line
containing information about the queue:
+.PP
.in +4n
-.nf
-
+.EX
.RB "$" " cat /dev/mqueue/mymq"
QSIZE:129 NOTIFY:2 SIGNO:0 NOTIFY_PID:8260
-
-.fi
+.EE
.in
+.PP
These fields are as follows:
.TP
.B QSIZE
POSIX.1-2001 and POSIX.1-2008 require that all functions specified
in the standard shall be thread-safe,
except for the following functions:
+.PP
.in +4n
-.nf
-
+.EX
asctime()
basename()
catgets()
.PP
Only the following functions are required to be async-cancel-safe by
POSIX.1-2001 and POSIX.1-2008:
+.PP
.in +4n
-.nf
-
+.EX
pthread_cancel()
pthread_setcancelstate()
pthread_setcanceltype()
.BR getconf (1)
command can be used to determine
the system's threading implementation, for example:
-.nf
+.PP
.in +4n
-
+.EX
bash$ getconf GNU_LIBPTHREAD_VERSION
NPTL 2.3.4
-.in
.fi
+.in
.PP
With older glibc versions, a command such as the following should
be sufficient to determine the default threading implementation:
-.nf
+.PP
.in +4n
-
+.EX
bash$ $( ldd /bin/ls | grep libc.so | awk \(aq{print $3}\(aq ) | \\
egrep \-i \(aqthreads|nptl\(aq
Native POSIX Threads Library by Ulrich Drepper et al
-.in
.fi
+.in
.SS Selecting the threading implementation: LD_ASSUME_KERNEL
On systems with a glibc that supports both LinuxThreads and NPTL
(i.e., glibc 2.3.\fIx\fP), the
(broken) application that depends on some nonconformant behavior
in LinuxThreads.)
For example:
-.nf
+.PP
.in +4n
-
+.EX
bash$ $( LD_ASSUME_KERNEL=2.2.5 ldd /bin/ls | grep libc.so | \\
awk \(aq{print $3}\(aq ) | egrep \-i \(aqthreads|nptl\(aq
linuxthreads-0.10 by Xavier Leroy
-.in
.fi
+.in
.SH SEE ALSO
.ad l
.nh
A process's autogroup (task group) membership can be viewed via the file
.IR /proc/[pid]/autogroup :
.PP
-.nf
.in +4n
+.nf
$ \fBcat /proc/1/autogroup\fP
/autogroup-1 nice 0
-.in
.fi
+.in
.PP
This file can also be used to modify the CPU bandwidth allocated
to an autogroup.
.I all
of the processes in a terminal session:
.PP
-.nf
.in +4n
+.nf
$ \fBecho 10 > /proc/self/autogroup\fP
-.in
.fi
+.in
.SS Real-time features in the mainline Linux kernel
.\" FIXME . Probably this text will need some minor tweaking
.\" ask Carsten Emde about this.
mainline kernel,
they must be installed to achieve the best real-time performance.
These patches are named:
+.PP
.in +4n
-.nf
-
+.EX
patch-\fIkernelversion\fP-rt\fIpatchversion\fP
.fi
.in
.BR keyctl (1)
utility as:
.PP
-.nf
.in +4n
+.nf
keyctl session
keyctl session - [<prog> <arg1> <arg2> ...]
keyctl session <name> [<prog> <arg1> <arg2> ...]
-.in
.fi
+.in
.PP
and:
.PP
-.nf
.in +4n
+.nf
keyctl new_session
-.in
.fi
+.in
.SH SEE ALSO
.ad l
.nh
.SH BUGS
.\" FIXME . remove this section once glibc supports UDP-Lite
Where glibc support is missing, the following definitions are needed:
+.PP
.in +4n
-.nf
-
+.EX
#define IPPROTO_UDPLITE 136
.\" The following two are defined in the kernel in linux/net/udplite.h
#define UDPLITE_SEND_CSCOV 10
to other processes using ancillary data.
.SS Address format
A UNIX domain socket address is represented in the following structure:
+.PP
.in +4n
-.nf
-
+.EX
.\" #define UNIX_PATH_MAX 108
.\"
struct sockaddr_un {
/* handle error */ ;
printf("sun_path = %s\\n", ((struct sockaddr_un *) addrp)\->sun_path);
-.in
.fi
+.in
.PP
This sort of messiness can be avoided if it is guaranteed
that the applications that
.BR proc (5)
.PP
The documents, examples, and source code in the Linux source code tree:
+.PP
.in +4n
-.nf
-
+.EX
Documentation/ABI/stable/vdso
Documentation/ia64/fsys.txt
Documentation/vDSO/* (includes examples of using the vDSO)