]>
Commit | Line | Data |
---|---|---|
9d50331a | 1 | #!/usr/bin/python3 |
f1dca11c DW |
2 | |
3 | # Run online scrubbers in parallel, but avoid thrashing. | |
4 | # | |
5 | # Copyright (C) 2018 Oracle. All rights reserved. | |
6 | # | |
7 | # Author: Darrick J. Wong <darrick.wong@oracle.com> | |
8 | # | |
9 | # This program is free software; you can redistribute it and/or | |
10 | # modify it under the terms of the GNU General Public License | |
11 | # as published by the Free Software Foundation; either version 2 | |
12 | # of the License, or (at your option) any later version. | |
13 | # | |
14 | # This program is distributed in the hope that it would be useful, | |
15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | # GNU General Public License for more details. | |
18 | # | |
19 | # You should have received a copy of the GNU General Public License | |
20 | # along with this program; if not, write the Free Software Foundation, | |
21 | # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
22 | ||
23 | import subprocess | |
24 | import json | |
25 | import threading | |
26 | import time | |
27 | import sys | |
824b5807 | 28 | import os |
f1dca11c DW |
29 | |
30 | retcode = 0 | |
31 | terminate = False | |
32 | ||
824b5807 DW |
33 | def DEVNULL(): |
34 | '''Return /dev/null in subprocess writable format.''' | |
35 | try: | |
36 | from subprocess import DEVNULL | |
37 | return DEVNULL | |
38 | except ImportError: | |
39 | return open(os.devnull, 'wb') | |
40 | ||
f1dca11c DW |
41 | def find_mounts(): |
42 | '''Map mountpoints to physical disks.''' | |
43 | ||
44 | fs = {} | |
45 | cmd=['lsblk', '-o', 'KNAME,TYPE,FSTYPE,MOUNTPOINT', '-J'] | |
46 | result = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
47 | result.wait() | |
48 | if result.returncode != 0: | |
49 | return fs | |
50 | sarray = [x.decode('utf-8') for x in result.stdout.readlines()] | |
51 | output = ' '.join(sarray) | |
52 | bdevdata = json.loads(output) | |
53 | # The lsblk output had better be in disks-then-partitions order | |
54 | for bdev in bdevdata['blockdevices']: | |
55 | if bdev['type'] in ('disk', 'loop'): | |
56 | lastdisk = bdev['kname'] | |
57 | if bdev['fstype'] == 'xfs': | |
58 | mnt = bdev['mountpoint'] | |
59 | if mnt is None: | |
60 | continue | |
61 | if mnt in fs: | |
62 | fs[mnt].add(lastdisk) | |
63 | else: | |
64 | fs[mnt] = set([lastdisk]) | |
65 | return fs | |
66 | ||
824b5807 DW |
67 | def kill_systemd(unit, proc): |
68 | '''Kill systemd unit.''' | |
69 | proc.terminate() | |
70 | cmd=['systemctl', 'stop', unit] | |
71 | x = subprocess.Popen(cmd) | |
72 | x.wait() | |
73 | ||
f1dca11c DW |
74 | def run_killable(cmd, stdout, killfuncs, kill_fn): |
75 | '''Run a killable program. Returns program retcode or -1 if we can't start it.''' | |
76 | try: | |
77 | proc = subprocess.Popen(cmd, stdout = stdout) | |
78 | real_kill_fn = lambda: kill_fn(proc) | |
79 | killfuncs.add(real_kill_fn) | |
80 | proc.wait() | |
81 | try: | |
82 | killfuncs.remove(real_kill_fn) | |
83 | except: | |
84 | pass | |
85 | return proc.returncode | |
86 | except: | |
87 | return -1 | |
88 | ||
89 | def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs): | |
90 | '''Run a scrub process.''' | |
91 | global retcode, terminate | |
92 | ||
93 | print("Scrubbing %s..." % mnt) | |
94 | sys.stdout.flush() | |
95 | ||
96 | try: | |
97 | if terminate: | |
98 | return | |
99 | ||
824b5807 DW |
100 | # Try it the systemd way |
101 | cmd=['systemctl', 'start', 'xfs_scrub@%s' % mnt] | |
102 | ret = run_killable(cmd, DEVNULL(), killfuncs, \ | |
103 | lambda proc: kill_systemd('xfs_scrub@%s' % mnt, proc)) | |
104 | if ret == 0 or ret == 1: | |
105 | print("Scrubbing %s done, (err=%d)" % (mnt, ret)) | |
106 | sys.stdout.flush() | |
107 | retcode |= ret | |
108 | return | |
109 | ||
110 | if terminate: | |
111 | return | |
112 | ||
f1dca11c DW |
113 | # Invoke xfs_scrub manually |
114 | cmd=['@sbindir@/xfs_scrub', '@scrub_args@', mnt] | |
115 | ret = run_killable(cmd, None, killfuncs, \ | |
116 | lambda proc: proc.terminate()) | |
117 | if ret >= 0: | |
118 | print("Scrubbing %s done, (err=%d)" % (mnt, ret)) | |
119 | sys.stdout.flush() | |
120 | retcode |= ret | |
121 | return | |
122 | ||
123 | if terminate: | |
124 | return | |
125 | ||
126 | print("Unable to start scrub tool.") | |
127 | sys.stdout.flush() | |
128 | finally: | |
129 | running_devs -= mntdevs | |
130 | cond.acquire() | |
131 | cond.notify() | |
132 | cond.release() | |
133 | ||
134 | def main(): | |
135 | '''Find mounts, schedule scrub runs.''' | |
136 | def thr(mnt, devs): | |
137 | a = (mnt, cond, running_devs, devs, killfuncs) | |
138 | thr = threading.Thread(target = run_scrub, args = a) | |
139 | thr.start() | |
140 | global retcode, terminate | |
141 | ||
142 | fs = find_mounts() | |
143 | ||
824b5807 DW |
144 | # Tail the journal if we ourselves aren't a service... |
145 | journalthread = None | |
146 | if 'SERVICE_MODE' not in os.environ: | |
147 | try: | |
148 | cmd=['journalctl', '--no-pager', '-q', '-S', 'now', \ | |
149 | '-f', '-u', 'xfs_scrub@*', '-o', \ | |
150 | 'cat'] | |
151 | journalthread = subprocess.Popen(cmd) | |
152 | except: | |
153 | pass | |
154 | ||
f1dca11c DW |
155 | # Schedule scrub jobs... |
156 | running_devs = set() | |
157 | killfuncs = set() | |
158 | cond = threading.Condition() | |
159 | while len(fs) > 0: | |
160 | if len(running_devs) == 0: | |
161 | mnt, devs = fs.popitem() | |
162 | running_devs.update(devs) | |
163 | thr(mnt, devs) | |
164 | poppers = set() | |
165 | for mnt in fs: | |
166 | devs = fs[mnt] | |
167 | can_run = True | |
168 | for dev in devs: | |
169 | if dev in running_devs: | |
170 | can_run = False | |
171 | break | |
172 | if can_run: | |
173 | running_devs.update(devs) | |
174 | poppers.add(mnt) | |
175 | thr(mnt, devs) | |
176 | for p in poppers: | |
177 | fs.pop(p) | |
178 | cond.acquire() | |
179 | try: | |
180 | cond.wait() | |
181 | except KeyboardInterrupt: | |
182 | terminate = True | |
183 | print("Terminating...") | |
184 | sys.stdout.flush() | |
185 | while len(killfuncs) > 0: | |
186 | fn = killfuncs.pop() | |
187 | fn() | |
188 | fs = [] | |
189 | cond.release() | |
190 | ||
824b5807 DW |
191 | if journalthread is not None: |
192 | journalthread.terminate() | |
193 | ||
194 | # See the service mode comments in xfs_scrub.c for why we do this. | |
195 | if 'SERVICE_MODE' in os.environ: | |
196 | time.sleep(2) | |
197 | if retcode != 0: | |
198 | retcode = 1 | |
199 | ||
f1dca11c DW |
200 | sys.exit(retcode) |
201 | ||
202 | if __name__ == '__main__': | |
203 | main() |