]>
Commit | Line | Data |
---|---|---|
9d50331a | 1 | #!/usr/bin/python3 |
f1dca11c | 2 | |
959ef981 | 3 | # SPDX-License-Identifier: GPL-2.0+ |
f1dca11c DW |
4 | # Copyright (C) 2018 Oracle. All rights reserved. |
5 | # | |
6 | # Author: Darrick J. Wong <darrick.wong@oracle.com> | |
959ef981 DC |
7 | |
8 | # Run online scrubbers in parallel, but avoid thrashing. | |
f1dca11c DW |
9 | |
10 | import subprocess | |
11 | import json | |
12 | import threading | |
13 | import time | |
14 | import sys | |
824b5807 | 15 | import os |
3dd91472 | 16 | import argparse |
f1dca11c DW |
17 | |
18 | retcode = 0 | |
19 | terminate = False | |
20 | ||
824b5807 DW |
21 | def DEVNULL(): |
22 | '''Return /dev/null in subprocess writable format.''' | |
23 | try: | |
24 | from subprocess import DEVNULL | |
25 | return DEVNULL | |
26 | except ImportError: | |
27 | return open(os.devnull, 'wb') | |
28 | ||
f1dca11c DW |
29 | def find_mounts(): |
30 | '''Map mountpoints to physical disks.''' | |
ab11d016 DW |
31 | def find_xfs_mounts(bdev, fs, lastdisk): |
32 | '''Attach lastdisk to each fs found under bdev.''' | |
33 | if bdev['fstype'] == 'xfs' and bdev['mountpoint'] is not None: | |
34 | mnt = bdev['mountpoint'] | |
35 | if mnt in fs: | |
36 | fs[mnt].add(lastdisk) | |
37 | else: | |
38 | fs[mnt] = set([lastdisk]) | |
39 | if 'children' not in bdev: | |
40 | return | |
41 | for child in bdev['children']: | |
42 | find_xfs_mounts(child, fs, lastdisk) | |
f1dca11c DW |
43 | |
44 | fs = {} | |
ab11d016 | 45 | cmd=['lsblk', '-o', 'NAME,KNAME,TYPE,FSTYPE,MOUNTPOINT', '-J'] |
f1dca11c DW |
46 | result = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
47 | result.wait() | |
48 | if result.returncode != 0: | |
49 | return fs | |
74aed9c8 | 50 | sarray = [x.decode(sys.stdout.encoding) for x in result.stdout.readlines()] |
f1dca11c DW |
51 | output = ' '.join(sarray) |
52 | bdevdata = json.loads(output) | |
ab11d016 | 53 | |
f1dca11c DW |
54 | # The lsblk output had better be in disks-then-partitions order |
55 | for bdev in bdevdata['blockdevices']: | |
ab11d016 DW |
56 | lastdisk = bdev['kname'] |
57 | find_xfs_mounts(bdev, fs, lastdisk) | |
58 | ||
f1dca11c DW |
59 | return fs |
60 | ||
824b5807 DW |
61 | def kill_systemd(unit, proc): |
62 | '''Kill systemd unit.''' | |
63 | proc.terminate() | |
64 | cmd=['systemctl', 'stop', unit] | |
65 | x = subprocess.Popen(cmd) | |
66 | x.wait() | |
67 | ||
f1dca11c DW |
68 | def run_killable(cmd, stdout, killfuncs, kill_fn): |
69 | '''Run a killable program. Returns program retcode or -1 if we can't start it.''' | |
70 | try: | |
71 | proc = subprocess.Popen(cmd, stdout = stdout) | |
72 | real_kill_fn = lambda: kill_fn(proc) | |
73 | killfuncs.add(real_kill_fn) | |
74 | proc.wait() | |
75 | try: | |
76 | killfuncs.remove(real_kill_fn) | |
77 | except: | |
78 | pass | |
79 | return proc.returncode | |
80 | except: | |
81 | return -1 | |
82 | ||
07c6fd59 DW |
83 | # systemd doesn't like unit instance names with slashes in them, so it |
84 | # replaces them with dashes when it invokes the service. However, it's not | |
85 | # smart enough to convert the dashes to something else, so when it unescapes | |
86 | # the instance name to feed to xfs_scrub, it turns all dashes into slashes. | |
87 | # "/moo-cow" becomes "-moo-cow" becomes "/moo/cow", which is wrong. systemd | |
88 | # actually /can/ escape the dashes correctly if it is told that this is a path | |
89 | # (and not a unit name), but it didn't do this prior to January 2017, so fix | |
90 | # this for them. | |
29370436 DW |
91 | # |
92 | # systemd path escaping also drops the initial slash so we add that back in so | |
93 | # that log messages from the service units preserve the full path and users can | |
94 | # look up log messages using full paths. However, for "/" the escaping rules | |
95 | # do /not/ drop the initial slash, so we have to special-case that here. | |
07c6fd59 DW |
96 | def systemd_escape(path): |
97 | '''Escape a path to avoid mangled systemd mangling.''' | |
98 | ||
29370436 DW |
99 | if path == '/': |
100 | return '-' | |
07c6fd59 DW |
101 | cmd = ['systemd-escape', '--path', path] |
102 | try: | |
103 | proc = subprocess.Popen(cmd, stdout = subprocess.PIPE) | |
104 | proc.wait() | |
105 | for line in proc.stdout: | |
106 | return '-' + line.decode(sys.stdout.encoding).strip() | |
107 | except: | |
108 | return path | |
109 | ||
f1dca11c DW |
110 | def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs): |
111 | '''Run a scrub process.''' | |
112 | global retcode, terminate | |
113 | ||
114 | print("Scrubbing %s..." % mnt) | |
115 | sys.stdout.flush() | |
116 | ||
117 | try: | |
118 | if terminate: | |
119 | return | |
120 | ||
824b5807 | 121 | # Try it the systemd way |
07c6fd59 | 122 | cmd=['systemctl', 'start', 'xfs_scrub@%s' % systemd_escape(mnt)] |
824b5807 DW |
123 | ret = run_killable(cmd, DEVNULL(), killfuncs, \ |
124 | lambda proc: kill_systemd('xfs_scrub@%s' % mnt, proc)) | |
125 | if ret == 0 or ret == 1: | |
126 | print("Scrubbing %s done, (err=%d)" % (mnt, ret)) | |
127 | sys.stdout.flush() | |
128 | retcode |= ret | |
129 | return | |
130 | ||
131 | if terminate: | |
132 | return | |
133 | ||
f1dca11c DW |
134 | # Invoke xfs_scrub manually |
135 | cmd=['@sbindir@/xfs_scrub', '@scrub_args@', mnt] | |
136 | ret = run_killable(cmd, None, killfuncs, \ | |
137 | lambda proc: proc.terminate()) | |
138 | if ret >= 0: | |
139 | print("Scrubbing %s done, (err=%d)" % (mnt, ret)) | |
140 | sys.stdout.flush() | |
141 | retcode |= ret | |
142 | return | |
143 | ||
144 | if terminate: | |
145 | return | |
146 | ||
147 | print("Unable to start scrub tool.") | |
148 | sys.stdout.flush() | |
149 | finally: | |
150 | running_devs -= mntdevs | |
151 | cond.acquire() | |
152 | cond.notify() | |
153 | cond.release() | |
154 | ||
155 | def main(): | |
156 | '''Find mounts, schedule scrub runs.''' | |
157 | def thr(mnt, devs): | |
158 | a = (mnt, cond, running_devs, devs, killfuncs) | |
159 | thr = threading.Thread(target = run_scrub, args = a) | |
160 | thr.start() | |
161 | global retcode, terminate | |
162 | ||
3dd91472 DW |
163 | parser = argparse.ArgumentParser( \ |
164 | description = "Scrub all mounted XFS filesystems.") | |
165 | parser.add_argument("-V", help = "Report version and exit.", \ | |
166 | action = "store_true") | |
167 | args = parser.parse_args() | |
168 | ||
169 | if args.V: | |
170 | print("xfs_scrub_all version @pkg_version@") | |
171 | sys.exit(0) | |
172 | ||
f1dca11c DW |
173 | fs = find_mounts() |
174 | ||
824b5807 DW |
175 | # Tail the journal if we ourselves aren't a service... |
176 | journalthread = None | |
177 | if 'SERVICE_MODE' not in os.environ: | |
178 | try: | |
179 | cmd=['journalctl', '--no-pager', '-q', '-S', 'now', \ | |
180 | '-f', '-u', 'xfs_scrub@*', '-o', \ | |
181 | 'cat'] | |
182 | journalthread = subprocess.Popen(cmd) | |
183 | except: | |
184 | pass | |
185 | ||
f1dca11c DW |
186 | # Schedule scrub jobs... |
187 | running_devs = set() | |
188 | killfuncs = set() | |
189 | cond = threading.Condition() | |
190 | while len(fs) > 0: | |
191 | if len(running_devs) == 0: | |
192 | mnt, devs = fs.popitem() | |
193 | running_devs.update(devs) | |
194 | thr(mnt, devs) | |
195 | poppers = set() | |
196 | for mnt in fs: | |
197 | devs = fs[mnt] | |
198 | can_run = True | |
199 | for dev in devs: | |
200 | if dev in running_devs: | |
201 | can_run = False | |
202 | break | |
203 | if can_run: | |
204 | running_devs.update(devs) | |
205 | poppers.add(mnt) | |
206 | thr(mnt, devs) | |
207 | for p in poppers: | |
208 | fs.pop(p) | |
209 | cond.acquire() | |
210 | try: | |
211 | cond.wait() | |
212 | except KeyboardInterrupt: | |
213 | terminate = True | |
214 | print("Terminating...") | |
215 | sys.stdout.flush() | |
216 | while len(killfuncs) > 0: | |
217 | fn = killfuncs.pop() | |
218 | fn() | |
219 | fs = [] | |
220 | cond.release() | |
221 | ||
824b5807 DW |
222 | if journalthread is not None: |
223 | journalthread.terminate() | |
224 | ||
225 | # See the service mode comments in xfs_scrub.c for why we do this. | |
226 | if 'SERVICE_MODE' in os.environ: | |
227 | time.sleep(2) | |
228 | if retcode != 0: | |
229 | retcode = 1 | |
230 | ||
f1dca11c DW |
231 | sys.exit(retcode) |
232 | ||
233 | if __name__ == '__main__': | |
234 | main() |