]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - tools/xfsbuflock.py
cc15f582c4efb87e01ae82b010beeea6e2ba2021
[thirdparty/xfsprogs-dev.git] / tools / xfsbuflock.py
1 #!/usr/bin/env python3
2
3 # Read ftrace input, looking for XFS buffer deadlocks.
4 #
5 # Copyright (C) 2016 Oracle. All Rights Reserved.
6 #
7 # Author: Darrick J. Wong <darrick.wong@oracle.com>
8 #
9 # This program is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License
11 # as published by the Free Software Foundation; either version 2
12 # of the License, or (at your option) any later version.
13 #
14 # This program is distributed in the hope that it would be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write the Free Software Foundation,
21 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
22 #
23 # Rough guide to using this script:
24 # Collect ftrace data from a deadlock:
25 #
26 # # trace-cmd record -e 'xfs_buf_*lock*' <other traces> &
27 # <run command, hang system>^Z
28 # # killall -INT trace-cmd
29 # <wait for trace-cmd to spit out trace.dat>
30 #
31 # Now analyze the captured trace data:
32 #
33 # # trace-cmd report | xfsbuflock.py
34 # === fsx-14956 ===
35 # <trace data>
36 # 3732.005575: xfs_buf_trylock_fail: dev 8:16 bno 0x1 nblks 0x1 hold 4 \
37 # pincount 1 lock 0 flags DONE|KMEM caller 0xc009af36s
38 # Locked buffers:
39 # dev 8:16 bno 0x64c371 nblks 0x1 lock 1 owner fsx-14956@3732.005567
40 # waiting: fsx-14954
41 # dev 8:16 bno 0x64c380 nblks 0x8 lock 1 owner fsx-14956@3732.005571
42 # dev 8:16 bno 0x64c378 nblks 0x8 lock 1 owner fsx-14956@3732.005570
43 # === fsx-14954 ===
44 # <trace data>
45 # 3732.005592: xfs_buf_trylock_fail: dev 8:16 bno 0x64c371 nblks 0x1 hold 4 \
46 # pincount 1 lock 0 flags ASYNC|DONE|KMEM caller 0xc009af36s
47 # Locked buffers:
48 # dev 8:16 bno 0x8 nblks 0x8 lock 1 owner fsx-14954@3732.005583
49 # dev 8:16 bno 0x1 nblks 0x1 lock 1 owner fsx-14954@3732.005574
50 # waiting: fsx-14956
51 # waiting: fsx-14957
52 # waiting: fsx-14958
53 # dev 8:16 bno 0x10 nblks 0x8 lock 1 owner fsx-14954@3732.005585
54 #
55 # As you can see, fsx-14596 is locking AGFs in violation of the locking
56 # order rules.
57
58 import sys
59 import fileinput
60 from collections import namedtuple
61
62 NR_BACKTRACE = 50
63
64 class Process:
65 def __init__(self, pid):
66 self.pid = pid;
67 self.bufs = set()
68 self.locked_bufs = set()
69 self.backtrace = []
70
71 def dump(self):
72 print('=== %s ===' % self.pid)
73 for bt in self.backtrace:
74 print('%f: %s' % (bt.time, bt.descr))
75 print('Locked buffers:')
76 for buf in self.locked_bufs:
77 buf.dump()
78
79 class Buffer:
80 def __init__(self, dev, bno, blen):
81 self.dev = dev
82 self.bno = int(bno, 0)
83 self.blen = int(blen, 0)
84 self.locked = False
85 self.locktime = None
86 self.owner = None
87 self.waiters = set()
88
89 def trylock(self, process, time):
90 if not self.locked:
91 self.lockdone(process, time)
92
93 def lockdone(self, process, time):
94 if self.locked:
95 print('Buffer already locked on line %d?!' % nr)
96 # process.dump()
97 # self.dump()
98 # assert False
99 if process in self.waiters:
100 self.waiters.remove(process)
101 self.locked = True
102 self.owner = process
103 self.locktime = time
104 process.locked_bufs.add(self)
105 process.bufs.add(self)
106 locked_buffers.add(self)
107
108 def waitlock(self, process):
109 self.waiters.add(process)
110
111 def unlock(self):
112 self.locked = False
113 if self in locked_buffers:
114 locked_buffers.remove(self)
115 if self.owner is not None and \
116 self in self.owner.locked_bufs:
117 self.owner.locked_bufs.remove(self)
118
119 def dump(self):
120 if self.owner is not None:
121 pid = '%s@%f' % (self.owner.pid, self.locktime)
122 else:
123 pid = ''
124 print('dev %s bno 0x%x nblks 0x%x lock %d owner %s' % \
125 (self.dev, self.bno, self.blen, self.locked, \
126 pid))
127 for proc in self.waiters:
128 print(' waiting: %s' % proc.pid)
129
130 Event = namedtuple('Event', 'time descr')
131
132 # Read ftrace input, looking for events and for buffer lock info
133 processes = {}
134 buffers = {}
135 locked_buffers = set()
136
137 def getbuf(toks):
138 if int(toks[7], 0) == 18446744073709551615:
139 return None
140 bufkey = ' '.join(toks[4:10])
141 if bufkey in buffers:
142 return buffers[bufkey]
143 buf = Buffer(toks[5], toks[7], toks[9])
144 buffers[bufkey] = buf
145 return buf
146
147 nr = 0
148 for line in fileinput.input():
149 nr += 1
150 toks = line.split()
151 if len(toks) < 4:
152 continue
153 pid = toks[0]
154 try:
155 time = float(toks[2][:-1])
156 except:
157 continue
158 fn = toks[3][:-1]
159
160 if pid in processes:
161 proc = processes[pid]
162 else:
163 proc = Process(pid)
164 processes[pid] = proc
165
166 if fn == 'xfs_buf_unlock' or fn == 'xfs_buf_item_unlock_stale':
167 buf = getbuf(toks)
168 if buf is not None:
169 buf.unlock()
170 elif fn == 'xfs_buf_lock_done':
171 buf = getbuf(toks)
172 if buf is not None:
173 buf.lockdone(proc, time)
174 elif fn == 'xfs_buf_lock':
175 buf = getbuf(toks)
176 if buf is not None:
177 buf.waitlock(proc)
178 elif fn == 'xfs_buf_trylock':
179 buf = getbuf(toks)
180 if buf is not None:
181 buf.trylock(proc, time)
182 elif fn == 'xfs_buf_item_unlock':
183 pass
184 else:
185 e = Event(time, ' '.join(toks[3:]))
186 proc.backtrace.append(e)
187 if len(proc.backtrace) > NR_BACKTRACE:
188 proc.backtrace.pop(0)
189
190 deadlocked = set()
191 for buf in locked_buffers:
192 deadlocked.add(buf.owner)
193
194 for proc in deadlocked:
195 proc.dump()
196
197 sys.exit(0)
198
199 for key in buffers:
200 buf = buffers[key]
201 if buf.locked:
202 print('dev %s bno 0x%x len 0x%x owner %s' % (buf.dev, buf.bno, buf.blen, buf.owner.pid))
203 else:
204 print('dev %s bno 0x%x len 0x%x' % (buf.dev, buf.bno, buf.blen))
205
206 sys.exit(0)
207
208 for pid in processes:
209 proc = processes[pid]