]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - scrub/xfs_scrub_all.in
xfs_scrub_all: fix systemd escaping again
[thirdparty/xfsprogs-dev.git] / scrub / xfs_scrub_all.in
CommitLineData
9d50331a 1#!/usr/bin/python3
f1dca11c 2
959ef981 3# SPDX-License-Identifier: GPL-2.0+
f1dca11c
DW
4# Copyright (C) 2018 Oracle. All rights reserved.
5#
6# Author: Darrick J. Wong <darrick.wong@oracle.com>
959ef981
DC
7
8# Run online scrubbers in parallel, but avoid thrashing.
f1dca11c
DW
9
10import subprocess
11import json
12import threading
13import time
14import sys
824b5807 15import os
3dd91472 16import argparse
f1dca11c
DW
17
18retcode = 0
19terminate = False
20
824b5807
DW
21def DEVNULL():
22 '''Return /dev/null in subprocess writable format.'''
23 try:
24 from subprocess import DEVNULL
25 return DEVNULL
26 except ImportError:
27 return open(os.devnull, 'wb')
28
f1dca11c
DW
29def find_mounts():
30 '''Map mountpoints to physical disks.'''
31
32 fs = {}
33 cmd=['lsblk', '-o', 'KNAME,TYPE,FSTYPE,MOUNTPOINT', '-J']
34 result = subprocess.Popen(cmd, stdout=subprocess.PIPE)
35 result.wait()
36 if result.returncode != 0:
37 return fs
74aed9c8 38 sarray = [x.decode(sys.stdout.encoding) for x in result.stdout.readlines()]
f1dca11c
DW
39 output = ' '.join(sarray)
40 bdevdata = json.loads(output)
41 # The lsblk output had better be in disks-then-partitions order
42 for bdev in bdevdata['blockdevices']:
43 if bdev['type'] in ('disk', 'loop'):
44 lastdisk = bdev['kname']
45 if bdev['fstype'] == 'xfs':
46 mnt = bdev['mountpoint']
47 if mnt is None:
48 continue
49 if mnt in fs:
50 fs[mnt].add(lastdisk)
51 else:
52 fs[mnt] = set([lastdisk])
53 return fs
54
824b5807
DW
55def kill_systemd(unit, proc):
56 '''Kill systemd unit.'''
57 proc.terminate()
58 cmd=['systemctl', 'stop', unit]
59 x = subprocess.Popen(cmd)
60 x.wait()
61
f1dca11c
DW
62def run_killable(cmd, stdout, killfuncs, kill_fn):
63 '''Run a killable program. Returns program retcode or -1 if we can't start it.'''
64 try:
65 proc = subprocess.Popen(cmd, stdout = stdout)
66 real_kill_fn = lambda: kill_fn(proc)
67 killfuncs.add(real_kill_fn)
68 proc.wait()
69 try:
70 killfuncs.remove(real_kill_fn)
71 except:
72 pass
73 return proc.returncode
74 except:
75 return -1
76
07c6fd59
DW
77# systemd doesn't like unit instance names with slashes in them, so it
78# replaces them with dashes when it invokes the service. However, it's not
79# smart enough to convert the dashes to something else, so when it unescapes
80# the instance name to feed to xfs_scrub, it turns all dashes into slashes.
81# "/moo-cow" becomes "-moo-cow" becomes "/moo/cow", which is wrong. systemd
82# actually /can/ escape the dashes correctly if it is told that this is a path
83# (and not a unit name), but it didn't do this prior to January 2017, so fix
84# this for them.
29370436
DW
85#
86# systemd path escaping also drops the initial slash so we add that back in so
87# that log messages from the service units preserve the full path and users can
88# look up log messages using full paths. However, for "/" the escaping rules
89# do /not/ drop the initial slash, so we have to special-case that here.
07c6fd59
DW
90def systemd_escape(path):
91 '''Escape a path to avoid mangled systemd mangling.'''
92
29370436
DW
93 if path == '/':
94 return '-'
07c6fd59
DW
95 cmd = ['systemd-escape', '--path', path]
96 try:
97 proc = subprocess.Popen(cmd, stdout = subprocess.PIPE)
98 proc.wait()
99 for line in proc.stdout:
100 return '-' + line.decode(sys.stdout.encoding).strip()
101 except:
102 return path
103
f1dca11c
DW
104def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs):
105 '''Run a scrub process.'''
106 global retcode, terminate
107
108 print("Scrubbing %s..." % mnt)
109 sys.stdout.flush()
110
111 try:
112 if terminate:
113 return
114
824b5807 115 # Try it the systemd way
07c6fd59 116 cmd=['systemctl', 'start', 'xfs_scrub@%s' % systemd_escape(mnt)]
824b5807
DW
117 ret = run_killable(cmd, DEVNULL(), killfuncs, \
118 lambda proc: kill_systemd('xfs_scrub@%s' % mnt, proc))
119 if ret == 0 or ret == 1:
120 print("Scrubbing %s done, (err=%d)" % (mnt, ret))
121 sys.stdout.flush()
122 retcode |= ret
123 return
124
125 if terminate:
126 return
127
f1dca11c
DW
128 # Invoke xfs_scrub manually
129 cmd=['@sbindir@/xfs_scrub', '@scrub_args@', mnt]
130 ret = run_killable(cmd, None, killfuncs, \
131 lambda proc: proc.terminate())
132 if ret >= 0:
133 print("Scrubbing %s done, (err=%d)" % (mnt, ret))
134 sys.stdout.flush()
135 retcode |= ret
136 return
137
138 if terminate:
139 return
140
141 print("Unable to start scrub tool.")
142 sys.stdout.flush()
143 finally:
144 running_devs -= mntdevs
145 cond.acquire()
146 cond.notify()
147 cond.release()
148
149def main():
150 '''Find mounts, schedule scrub runs.'''
151 def thr(mnt, devs):
152 a = (mnt, cond, running_devs, devs, killfuncs)
153 thr = threading.Thread(target = run_scrub, args = a)
154 thr.start()
155 global retcode, terminate
156
3dd91472
DW
157 parser = argparse.ArgumentParser( \
158 description = "Scrub all mounted XFS filesystems.")
159 parser.add_argument("-V", help = "Report version and exit.", \
160 action = "store_true")
161 args = parser.parse_args()
162
163 if args.V:
164 print("xfs_scrub_all version @pkg_version@")
165 sys.exit(0)
166
f1dca11c
DW
167 fs = find_mounts()
168
824b5807
DW
169 # Tail the journal if we ourselves aren't a service...
170 journalthread = None
171 if 'SERVICE_MODE' not in os.environ:
172 try:
173 cmd=['journalctl', '--no-pager', '-q', '-S', 'now', \
174 '-f', '-u', 'xfs_scrub@*', '-o', \
175 'cat']
176 journalthread = subprocess.Popen(cmd)
177 except:
178 pass
179
f1dca11c
DW
180 # Schedule scrub jobs...
181 running_devs = set()
182 killfuncs = set()
183 cond = threading.Condition()
184 while len(fs) > 0:
185 if len(running_devs) == 0:
186 mnt, devs = fs.popitem()
187 running_devs.update(devs)
188 thr(mnt, devs)
189 poppers = set()
190 for mnt in fs:
191 devs = fs[mnt]
192 can_run = True
193 for dev in devs:
194 if dev in running_devs:
195 can_run = False
196 break
197 if can_run:
198 running_devs.update(devs)
199 poppers.add(mnt)
200 thr(mnt, devs)
201 for p in poppers:
202 fs.pop(p)
203 cond.acquire()
204 try:
205 cond.wait()
206 except KeyboardInterrupt:
207 terminate = True
208 print("Terminating...")
209 sys.stdout.flush()
210 while len(killfuncs) > 0:
211 fn = killfuncs.pop()
212 fn()
213 fs = []
214 cond.release()
215
824b5807
DW
216 if journalthread is not None:
217 journalthread.terminate()
218
219 # See the service mode comments in xfs_scrub.c for why we do this.
220 if 'SERVICE_MODE' in os.environ:
221 time.sleep(2)
222 if retcode != 0:
223 retcode = 1
224
f1dca11c
DW
225 sys.exit(retcode)
226
227if __name__ == '__main__':
228 main()