]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - scrub/xfs_scrub_all.in
xfs_scrub_all: escape service names consistently
[thirdparty/xfsprogs-dev.git] / scrub / xfs_scrub_all.in
CommitLineData
9d50331a 1#!/usr/bin/python3
f1dca11c 2
8d318d62 3# SPDX-License-Identifier: GPL-2.0-or-later
52520522 4# Copyright (C) 2018-2024 Oracle. All rights reserved.
f1dca11c 5#
8d318d62 6# Author: Darrick J. Wong <djwong@kernel.org>
959ef981
DC
7
8# Run online scrubbers in parallel, but avoid thrashing.
f1dca11c
DW
9
10import subprocess
11import json
12import threading
13import time
14import sys
824b5807 15import os
3dd91472 16import argparse
f1dca11c
DW
17
18retcode = 0
19terminate = False
20
824b5807
DW
21def DEVNULL():
22 '''Return /dev/null in subprocess writable format.'''
23 try:
24 from subprocess import DEVNULL
25 return DEVNULL
26 except ImportError:
27 return open(os.devnull, 'wb')
28
f1dca11c
DW
29def find_mounts():
30 '''Map mountpoints to physical disks.'''
ab11d016
DW
31 def find_xfs_mounts(bdev, fs, lastdisk):
32 '''Attach lastdisk to each fs found under bdev.'''
33 if bdev['fstype'] == 'xfs' and bdev['mountpoint'] is not None:
34 mnt = bdev['mountpoint']
35 if mnt in fs:
36 fs[mnt].add(lastdisk)
37 else:
38 fs[mnt] = set([lastdisk])
39 if 'children' not in bdev:
40 return
41 for child in bdev['children']:
42 find_xfs_mounts(child, fs, lastdisk)
f1dca11c
DW
43
44 fs = {}
ab11d016 45 cmd=['lsblk', '-o', 'NAME,KNAME,TYPE,FSTYPE,MOUNTPOINT', '-J']
f1dca11c
DW
46 result = subprocess.Popen(cmd, stdout=subprocess.PIPE)
47 result.wait()
48 if result.returncode != 0:
49 return fs
74aed9c8 50 sarray = [x.decode(sys.stdout.encoding) for x in result.stdout.readlines()]
f1dca11c
DW
51 output = ' '.join(sarray)
52 bdevdata = json.loads(output)
ab11d016 53
f1dca11c
DW
54 # The lsblk output had better be in disks-then-partitions order
55 for bdev in bdevdata['blockdevices']:
ab11d016
DW
56 lastdisk = bdev['kname']
57 find_xfs_mounts(bdev, fs, lastdisk)
58
f1dca11c
DW
59 return fs
60
824b5807
DW
61def kill_systemd(unit, proc):
62 '''Kill systemd unit.'''
63 proc.terminate()
64 cmd=['systemctl', 'stop', unit]
65 x = subprocess.Popen(cmd)
66 x.wait()
67
f1dca11c
DW
68def run_killable(cmd, stdout, killfuncs, kill_fn):
69 '''Run a killable program. Returns program retcode or -1 if we can't start it.'''
70 try:
71 proc = subprocess.Popen(cmd, stdout = stdout)
72 real_kill_fn = lambda: kill_fn(proc)
73 killfuncs.add(real_kill_fn)
74 proc.wait()
75 try:
76 killfuncs.remove(real_kill_fn)
77 except:
78 pass
79 return proc.returncode
80 except:
81 return -1
82
07c6fd59
DW
83# systemd doesn't like unit instance names with slashes in them, so it
84# replaces them with dashes when it invokes the service. However, it's not
85# smart enough to convert the dashes to something else, so when it unescapes
86# the instance name to feed to xfs_scrub, it turns all dashes into slashes.
87# "/moo-cow" becomes "-moo-cow" becomes "/moo/cow", which is wrong. systemd
88# actually /can/ escape the dashes correctly if it is told that this is a path
89# (and not a unit name), but it didn't do this prior to January 2017, so fix
90# this for them.
29370436
DW
91#
92# systemd path escaping also drops the initial slash so we add that back in so
93# that log messages from the service units preserve the full path and users can
94# look up log messages using full paths. However, for "/" the escaping rules
95# do /not/ drop the initial slash, so we have to special-case that here.
7c4b91c5 96def path_to_service(path):
07c6fd59
DW
97 '''Escape a path to avoid mangled systemd mangling.'''
98
29370436 99 if path == '/':
7c4b91c5 100 return 'xfs_scrub@-'
07c6fd59
DW
101 cmd = ['systemd-escape', '--path', path]
102 try:
103 proc = subprocess.Popen(cmd, stdout = subprocess.PIPE)
104 proc.wait()
105 for line in proc.stdout:
7c4b91c5 106 return 'xfs_scrub@-%s' % line.decode(sys.stdout.encoding).strip()
07c6fd59 107 except:
7c4b91c5 108 return None
07c6fd59 109
f1dca11c
DW
110def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs):
111 '''Run a scrub process.'''
112 global retcode, terminate
113
114 print("Scrubbing %s..." % mnt)
115 sys.stdout.flush()
116
117 try:
118 if terminate:
119 return
120
824b5807 121 # Try it the systemd way
7c4b91c5
DW
122 svcname = path_to_service(path)
123 if svcname is not None:
124 cmd=['systemctl', 'start', svcname]
125 ret = run_killable(cmd, DEVNULL(), killfuncs, \
126 lambda proc: kill_systemd(svcname, proc))
127 if ret == 0 or ret == 1:
128 print("Scrubbing %s done, (err=%d)" % (mnt, ret))
129 sys.stdout.flush()
130 retcode |= ret
131 return
132
133 if terminate:
134 return
824b5807 135
f1dca11c
DW
136 # Invoke xfs_scrub manually
137 cmd=['@sbindir@/xfs_scrub', '@scrub_args@', mnt]
138 ret = run_killable(cmd, None, killfuncs, \
139 lambda proc: proc.terminate())
140 if ret >= 0:
141 print("Scrubbing %s done, (err=%d)" % (mnt, ret))
142 sys.stdout.flush()
143 retcode |= ret
144 return
145
146 if terminate:
147 return
148
149 print("Unable to start scrub tool.")
150 sys.stdout.flush()
151 finally:
152 running_devs -= mntdevs
153 cond.acquire()
154 cond.notify()
155 cond.release()
156
157def main():
158 '''Find mounts, schedule scrub runs.'''
159 def thr(mnt, devs):
160 a = (mnt, cond, running_devs, devs, killfuncs)
161 thr = threading.Thread(target = run_scrub, args = a)
162 thr.start()
163 global retcode, terminate
164
3dd91472
DW
165 parser = argparse.ArgumentParser( \
166 description = "Scrub all mounted XFS filesystems.")
167 parser.add_argument("-V", help = "Report version and exit.", \
168 action = "store_true")
169 args = parser.parse_args()
170
171 if args.V:
172 print("xfs_scrub_all version @pkg_version@")
173 sys.exit(0)
174
f1dca11c
DW
175 fs = find_mounts()
176
824b5807
DW
177 # Tail the journal if we ourselves aren't a service...
178 journalthread = None
179 if 'SERVICE_MODE' not in os.environ:
180 try:
181 cmd=['journalctl', '--no-pager', '-q', '-S', 'now', \
182 '-f', '-u', 'xfs_scrub@*', '-o', \
183 'cat']
184 journalthread = subprocess.Popen(cmd)
185 except:
186 pass
187
f1dca11c
DW
188 # Schedule scrub jobs...
189 running_devs = set()
190 killfuncs = set()
191 cond = threading.Condition()
192 while len(fs) > 0:
193 if len(running_devs) == 0:
194 mnt, devs = fs.popitem()
195 running_devs.update(devs)
196 thr(mnt, devs)
197 poppers = set()
198 for mnt in fs:
199 devs = fs[mnt]
200 can_run = True
201 for dev in devs:
202 if dev in running_devs:
203 can_run = False
204 break
205 if can_run:
206 running_devs.update(devs)
207 poppers.add(mnt)
208 thr(mnt, devs)
209 for p in poppers:
210 fs.pop(p)
211 cond.acquire()
212 try:
213 cond.wait()
214 except KeyboardInterrupt:
215 terminate = True
216 print("Terminating...")
217 sys.stdout.flush()
218 while len(killfuncs) > 0:
219 fn = killfuncs.pop()
220 fn()
221 fs = []
222 cond.release()
223
824b5807
DW
224 if journalthread is not None:
225 journalthread.terminate()
226
227 # See the service mode comments in xfs_scrub.c for why we do this.
228 if 'SERVICE_MODE' in os.environ:
229 time.sleep(2)
230 if retcode != 0:
231 retcode = 1
232
f1dca11c
DW
233 sys.exit(retcode)
234
235if __name__ == '__main__':
236 main()