]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
xfs_scrub: integrate services with systemd
authorDarrick J. Wong <darrick.wong@oracle.com>
Fri, 2 Feb 2018 15:32:46 +0000 (09:32 -0600)
committerEric Sandeen <sandeen@redhat.com>
Fri, 2 Feb 2018 15:32:46 +0000 (09:32 -0600)
Create a systemd service unit so that we can run the online scrubber
under systemd with (somewhat) appropriate containment.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
15 files changed:
.gitignore
configure.ac
debian/postinst
include/builddefs.in
m4/Makefile
m4/package_services.m4 [new file with mode: 0644]
scrub/Makefile
scrub/xfs_scrub.c
scrub/xfs_scrub@.service.in [new file with mode: 0644]
scrub/xfs_scrub_all.cron.in [new file with mode: 0644]
scrub/xfs_scrub_all.in
scrub/xfs_scrub_all.service.in [new file with mode: 0644]
scrub/xfs_scrub_all.timer [new file with mode: 0644]
scrub/xfs_scrub_fail [new file with mode: 0755]
scrub/xfs_scrub_fail@.service.in [new file with mode: 0644]

index a3db640608e9ecd964ac0f89e0ffba7e5803e7ff..d887451375ff638a1ab81bfe27a8aebc70aa57e4 100644 (file)
@@ -69,6 +69,10 @@ cscope.*
 /rtcp/xfs_rtcp
 /spaceman/xfs_spaceman
 /scrub/xfs_scrub
+/scrub/xfs_scrub@.service
+/scrub/xfs_scrub_all
+/scrub/xfs_scrub_all.service
+/scrub/xfs_scrub_fail@.service
 
 # generated crc files
 /libxfs/crc32selftest
index bb032e5c0aca658ed76bff6cbb8b21e2d2b8f1c8..b438165d1f72f5fa1dff6128b906d4411c0b0217 100644 (file)
@@ -174,6 +174,8 @@ AC_HAVE_OPENAT
 AC_HAVE_FSTATAT
 AC_HAVE_SG_IO
 AC_HAVE_HDIO_GETGEO
+AC_CONFIG_SYSTEMD_SYSTEM_UNIT_DIR
+AC_CONFIG_CROND_DIR
 
 if test "$enable_blkid" = yes; then
 AC_HAVE_BLKID_TOPO
index d11c8d94a3cbe451a0c2978f6e51188a31baaded..11693a6eb0046092e05556e8151b55e3a2b9125a 100644 (file)
@@ -8,6 +8,9 @@ case "${1}" in
                then
                        update-initramfs -u
                fi
+               if [ -x /bin/systemctl ]; then
+                       /bin/systemctl daemon-reload
+               fi
                ;;
 
        abort-upgrade|abort-remove|abort-deconfigure)
index d44faf91cf257b8dc2f3f6444d451358032ee196..df76b2c1610d7e0a203440d7ba7091364516a472 100644 (file)
@@ -127,6 +127,10 @@ HAVE_OPENAT = @have_openat@
 HAVE_FSTATAT = @have_fstatat@
 HAVE_SG_IO = @have_sg_io@
 HAVE_HDIO_GETGEO = @have_hdio_getgeo@
+HAVE_SYSTEMD = @have_systemd@
+SYSTEMD_SYSTEM_UNIT_DIR = @systemd_system_unit_dir@
+HAVE_CROND = @have_crond@
+CROND_DIR = @crond_dir@
 
 GCCFLAGS = -funsigned-char -fno-strict-aliasing -Wall
 #         -Wbitwise -Wno-transparent-union -Wno-old-initializer -Wno-decl
index 61d617e143e87ab9879b083a766530e76ef5598e..a6d11e9704ff03992d8b56801f092059e4569a72 100644 (file)
@@ -21,6 +21,7 @@ LSRCFILES = \
        package_libcdev.m4 \
        package_pthread.m4 \
        package_sanitizer.m4 \
+       package_services.m4 \
        package_types.m4 \
        package_unistring.m4 \
        package_utilies.m4 \
diff --git a/m4/package_services.m4 b/m4/package_services.m4
new file mode 100644 (file)
index 0000000..f2d888a
--- /dev/null
@@ -0,0 +1,77 @@
+#
+# Figure out where to put systemd service units
+#
+AC_DEFUN([AC_CONFIG_SYSTEMD_SYSTEM_UNIT_DIR],
+[
+       AC_REQUIRE([PKG_PROG_PKG_CONFIG])
+       AC_ARG_WITH([systemd_unit_dir],
+         [AS_HELP_STRING([--with-systemd-unit-dir@<:@=DIR@:>@],
+               [Install systemd system units into DIR.])],
+         [],
+         [with_systemd_unit_dir=yes])
+       AS_IF([test "x${with_systemd_unit_dir}" != "xno"],
+         [
+               AS_IF([test "x${with_systemd_unit_dir}" = "xyes"],
+                 [
+                       PKG_CHECK_MODULES([systemd], [systemd],
+                         [
+                               with_systemd_unit_dir="$($PKG_CONFIG --variable=systemdsystemunitdir systemd)"
+                         ], [
+                               with_systemd_unit_dir=""
+                         ])
+                       m4_pattern_allow([^PKG_(MAJOR|MINOR|BUILD|REVISION)$])
+                 ])
+               AC_MSG_CHECKING([for systemd system unit dir])
+               systemd_system_unit_dir="${with_systemd_unit_dir}"
+               AS_IF([test -n "${systemd_system_unit_dir}"],
+                 [
+                       AC_MSG_RESULT(${systemd_system_unit_dir})
+                       have_systemd="yes"
+                 ],
+                 [
+                       AC_MSG_RESULT(no)
+                       have_systemd="no"
+                 ])
+         ],
+         [
+               have_systemd="disabled"
+         ])
+       AC_SUBST(have_systemd)
+       AC_SUBST(systemd_system_unit_dir)
+])
+
+#
+# Figure out where to install crontabs
+#
+AC_DEFUN([AC_CONFIG_CROND_DIR],
+[
+       AC_ARG_WITH([crond_dir],
+         [AS_HELP_STRING([--with-crond-dir@<:@=DIR@:>@],
+               [Install system crontabs into DIR.])],
+         [],
+         [with_crond_dir=yes])
+       AS_IF([test "x${with_crond_dir}" != "xno"],
+         [
+               AS_IF([test "x${with_crond_dir}" = "xyes"],
+                 [
+                       AS_IF([test -d "/etc/cron.d"],
+                         [with_crond_dir="/etc/cron.d"])
+                 ])
+               AC_MSG_CHECKING([for system crontab dir])
+               crond_dir="${with_crond_dir}"
+               AS_IF([test -n "${crond_dir}"],
+                 [
+                       AC_MSG_RESULT(${crond_dir})
+                       have_crond="yes"
+                 ],
+                 [
+                       AC_MSG_RESULT(no)
+                       have_crond="no"
+                 ])
+         ],
+         [
+               have_crond="disabled"
+         ])
+       AC_SUBST(have_crond)
+       AC_SUBST(crond_dir)
+])
index ca6dab0b88ff0628e65f0813bff73ab1ac266a05..063279400262becf1db5acfd1eef62972a625154 100644 (file)
@@ -15,6 +15,19 @@ LTCOMMAND = xfs_scrub
 INSTALL_SCRUB = install-scrub
 XFS_SCRUB_ALL_PROG = xfs_scrub_all
 XFS_SCRUB_ARGS = -b -n
+ifeq ($(HAVE_SYSTEMD),yes)
+INSTALL_SCRUB += install-systemd
+SYSTEMD_SERVICES = xfs_scrub@.service xfs_scrub_all.service xfs_scrub_all.timer xfs_scrub_fail@.service
+OPTIONAL_TARGETS += $(SYSTEMD_SERVICES)
+endif
+ifeq ($(HAVE_CROND),yes)
+INSTALL_SCRUB += install-crond
+CRONTABS = xfs_scrub_all.cron
+OPTIONAL_TARGETS += $(CRONTABS)
+# Don't enable the crontab by default for now
+CROND_DIR = $(PKG_LIB_DIR)/$(PKG_NAME)
+endif
+
 endif  # scrub_prereqs
 
 HFILES = \
@@ -84,7 +97,7 @@ ifeq ($(HAVE_HDIO_GETGEO),yes)
 LCFLAGS += -DHAVE_HDIO_GETGEO
 endif
 
-default: depend $(LTCOMMAND) $(XFS_SCRUB_ALL_PROG)
+default: depend $(LTCOMMAND) $(XFS_SCRUB_ALL_PROG) $(OPTIONAL_TARGETS)
 
 xfs_scrub_all: xfs_scrub_all.in
        @echo "    [SED]    $@"
@@ -98,6 +111,27 @@ include $(BUILDRULES)
 
 install: $(INSTALL_SCRUB)
 
+%.service: %.service.in
+       @echo "    [SED]    $@"
+       $(Q)$(SED) -e "s|@sbindir@|$(PKG_ROOT_SBIN_DIR)|g" \
+                  -e "s|@scrub_args@|$(XFS_SCRUB_ARGS)|g" \
+                  -e "s|@pkg_lib_dir@|$(PKG_LIB_DIR)|g" \
+                  -e "s|@pkg_name@|$(PKG_NAME)|g" < $< > $@
+
+%.cron: %.cron.in
+       @echo "    [SED]    $@"
+       $(Q)$(SED) -e "s|@sbindir@|$(PKG_ROOT_SBIN_DIR)|g" < $< > $@
+
+install-systemd: default $(SYSTEMD_SERVICES)
+       $(INSTALL) -m 755 -d $(SYSTEMD_SYSTEM_UNIT_DIR)
+       $(INSTALL) -m 644 $(SYSTEMD_SERVICES) $(SYSTEMD_SYSTEM_UNIT_DIR)
+       $(INSTALL) -m 755 -d $(PKG_LIB_DIR)/$(PKG_NAME)
+       $(INSTALL) -m 755 xfs_scrub_fail $(PKG_LIB_DIR)/$(PKG_NAME)
+
+install-crond: default $(CRONTABS)
+       $(INSTALL) -m 755 -d $(CROND_DIR)
+       $(INSTALL) -m 644 $(CRONTABS) $(CROND_DIR)
+
 install-scrub: default
        $(INSTALL) -m 755 -d $(PKG_ROOT_SBIN_DIR)
        $(LTINSTALL) -m 755 $(LTCOMMAND) $(PKG_ROOT_SBIN_DIR)
index 47e13810184a1947bfcfb76be5a428dd11682e4f..5ab557de780ebb5cad3af16b1e50e2b900a38409 100644 (file)
  * XFS_SCRUB_NO_SCSI_VERIFY    -- disable SCSI VERIFY (if present)
  * XFS_SCRUB_PHASE             -- run only this scrub phase
  * XFS_SCRUB_THREADS           -- start exactly this number of threads
+ *
+ * Available even in non-debug mode:
+ * SERVICE_MODE                        -- compress all error codes to 1 for LSB
+ *                                service action compliance
  */
 
 /* Program name; needed for libfrog error reports. */
@@ -154,6 +158,12 @@ bool                               want_fstrim = true;
 bool                           stderr_isatty;
 bool                           stdout_isatty;
 
+/*
+ * If we are running as a service, we need to be careful about what
+ * error codes we return to the calling process.
+ */
+static bool                    is_service;
+
 #define SCRUB_RET_SUCCESS      (0)     /* no problems left behind */
 #define SCRUB_RET_CORRUPT      (1)     /* corruption remains on fs */
 #define SCRUB_RET_UNOPTIMIZED  (2)     /* fs could be optimized */
@@ -624,6 +634,9 @@ _("Only one of the options -n or -y may be specified.\n"));
        if (stdout_isatty && !progress_fp)
                progress_fp = fdopen(1, "w+");
 
+       if (getenv("SERVICE_MODE"))
+               is_service = true;
+
        /* Find the mount record for the passed-in argument. */
        if (stat(argv[optind], &ctx.mnt_sb) < 0) {
                fprintf(stderr,
@@ -729,5 +742,24 @@ _("%s: %llu warnings found.\n"),
        free(ctx.blkdev);
        free(ctx.mntpoint);
 
+       /*
+        * If we're being run as a service, the return code must fit the LSB
+        * init script action error guidelines, which is to say that we
+        * compress all errors to 1 ("generic or unspecified error", LSB 5.0
+        * section 22.2) and hope the admin will scan the log for what
+        * actually happened.
+        *
+        * We have to sleep 2 seconds here because journald uses the pid to
+        * connect our log messages to the systemd service.  This is critical
+        * for capturing all the log messages if the scrub fails, because the
+        * fail service uses the service name to gather log messages for the
+        * error report.
+        */
+       if (is_service) {
+               sleep(2);
+               if (ret != SCRUB_RET_SUCCESS)
+                       return 1;
+       }
+
        return ret;
 }
diff --git a/scrub/xfs_scrub@.service.in b/scrub/xfs_scrub@.service.in
new file mode 100644 (file)
index 0000000..c14f813
--- /dev/null
@@ -0,0 +1,20 @@
+[Unit]
+Description=Online XFS Metadata Check for %I
+OnFailure=xfs_scrub_fail@%i.service
+Documentation=man:xfs_scrub(8)
+
+[Service]
+Type=oneshot
+WorkingDirectory=%I
+PrivateNetwork=true
+ProtectSystem=full
+ProtectHome=read-only
+PrivateTmp=yes
+AmbientCapabilities=CAP_SYS_ADMIN CAP_FOWNER CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_SYS_RAWIO
+NoNewPrivileges=yes
+User=nobody
+IOSchedulingClass=idle
+CPUSchedulingPolicy=idle
+Environment=SERVICE_MODE=1
+ExecStart=@sbindir@/xfs_scrub @scrub_args@ %I
+SyslogIdentifier=%N
diff --git a/scrub/xfs_scrub_all.cron.in b/scrub/xfs_scrub_all.cron.in
new file mode 100644 (file)
index 0000000..3dea929
--- /dev/null
@@ -0,0 +1 @@
+10 3 * * 0 root test -e /run/systemd/system || @sbindir@/xfs_scrub_all
index 773864413eaef091f7cc6d6b9d23954bebe91e73..fff05da3ea7a5cddd483a343d3b21e6e6e7a0573 100644 (file)
@@ -25,10 +25,19 @@ import json
 import threading
 import time
 import sys
+import os
 
 retcode = 0
 terminate = False
 
+def DEVNULL():
+       '''Return /dev/null in subprocess writable format.'''
+       try:
+               from subprocess import DEVNULL
+               return DEVNULL
+       except ImportError:
+               return open(os.devnull, 'wb')
+
 def find_mounts():
        '''Map mountpoints to physical disks.'''
 
@@ -55,6 +64,13 @@ def find_mounts():
                                fs[mnt] = set([lastdisk])
        return fs
 
+def kill_systemd(unit, proc):
+       '''Kill systemd unit.'''
+       proc.terminate()
+       cmd=['systemctl', 'stop', unit]
+       x = subprocess.Popen(cmd)
+       x.wait()
+
 def run_killable(cmd, stdout, killfuncs, kill_fn):
        '''Run a killable program.  Returns program retcode or -1 if we can't start it.'''
        try:
@@ -81,6 +97,19 @@ def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs):
                if terminate:
                        return
 
+               # Try it the systemd way
+               cmd=['systemctl', 'start', 'xfs_scrub@%s' % mnt]
+               ret = run_killable(cmd, DEVNULL(), killfuncs, \
+                               lambda proc: kill_systemd('xfs_scrub@%s' % mnt, proc))
+               if ret == 0 or ret == 1:
+                       print("Scrubbing %s done, (err=%d)" % (mnt, ret))
+                       sys.stdout.flush()
+                       retcode |= ret
+                       return
+
+               if terminate:
+                       return
+
                # Invoke xfs_scrub manually
                cmd=['@sbindir@/xfs_scrub', '@scrub_args@', mnt]
                ret = run_killable(cmd, None, killfuncs, \
@@ -112,6 +141,17 @@ def main():
 
        fs = find_mounts()
 
+       # Tail the journal if we ourselves aren't a service...
+       journalthread = None
+       if 'SERVICE_MODE' not in os.environ:
+               try:
+                       cmd=['journalctl', '--no-pager', '-q', '-S', 'now', \
+                                       '-f', '-u', 'xfs_scrub@*', '-o', \
+                                       'cat']
+                       journalthread = subprocess.Popen(cmd)
+               except:
+                       pass
+
        # Schedule scrub jobs...
        running_devs = set()
        killfuncs = set()
@@ -148,6 +188,15 @@ def main():
                        fs = []
                cond.release()
 
+       if journalthread is not None:
+               journalthread.terminate()
+
+       # See the service mode comments in xfs_scrub.c for why we do this.
+       if 'SERVICE_MODE' in os.environ:
+               time.sleep(2)
+               if retcode != 0:
+                       retcode = 1
+
        sys.exit(retcode)
 
 if __name__ == '__main__':
diff --git a/scrub/xfs_scrub_all.service.in b/scrub/xfs_scrub_all.service.in
new file mode 100644 (file)
index 0000000..66f82fc
--- /dev/null
@@ -0,0 +1,10 @@
+[Unit]
+Description=Online XFS Metadata Check for All Filesystems
+ConditionACPower=true
+Documentation=man:xfs_scrub_all(8)
+
+[Service]
+Type=oneshot
+Environment=SERVICE_MODE=1
+ExecStart=@sbindir@/xfs_scrub_all
+SyslogIdentifier=xfs_scrub_all
diff --git a/scrub/xfs_scrub_all.timer b/scrub/xfs_scrub_all.timer
new file mode 100644 (file)
index 0000000..2e4a33b
--- /dev/null
@@ -0,0 +1,11 @@
+[Unit]
+Description=Periodic XFS Online Metadata Check for All Filesystems
+
+[Timer]
+# Run on Sunday at 3:10am, to avoid running afoul of DST changes
+OnCalendar=Sun *-*-* 03:10:00
+RandomizedDelaySec=60
+Persistent=true
+
+[Install]
+WantedBy=timers.target
diff --git a/scrub/xfs_scrub_fail b/scrub/xfs_scrub_fail
new file mode 100755 (executable)
index 0000000..36dd50e
--- /dev/null
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Email logs of failed xfs_scrub unit runs
+
+mailer=/usr/sbin/sendmail
+recipient="$1"
+test -z "${recipient}" && exit 0
+mntpoint="$2"
+test -z "${mntpoint}" && exit 0
+hostname="$(hostname -f 2>/dev/null)"
+test -z "${hostname}" && hostname="${HOSTNAME}"
+if [ ! -x "${mailer}" ]; then
+       echo "${mailer}: Mailer program not found."
+       exit 1
+fi
+
+(cat << ENDL
+To: $1
+From: <xfs_scrub@${hostname}>
+Subject: xfs_scrub failure on ${mntpoint}
+
+So sorry, the automatic xfs_scrub of ${mntpoint} on ${hostname} failed.
+
+A log of what happened follows:
+ENDL
+systemctl status --full --lines 4294967295 "xfs_scrub@${mntpoint}") | "${mailer}" -t -i
diff --git a/scrub/xfs_scrub_fail@.service.in b/scrub/xfs_scrub_fail@.service.in
new file mode 100644 (file)
index 0000000..785f881
--- /dev/null
@@ -0,0 +1,10 @@
+[Unit]
+Description=Online XFS Metadata Check Failure Reporting for %I
+
+[Service]
+Type=oneshot
+Environment=EMAIL_ADDR=root
+ExecStart=@pkg_lib_dir@/@pkg_name@/xfs_scrub_fail "${EMAIL_ADDR}" %I
+User=mail
+Group=mail
+SupplementaryGroups=systemd-journal