]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - tools/xfsbuflock.py
xfs_buflock: record line number of trace where we locked the buffer
[thirdparty/xfsprogs-dev.git] / tools / xfsbuflock.py
CommitLineData
59d2274f
DW
1#!/usr/bin/env python3
2
3# Read ftrace input, looking for XFS buffer deadlocks.
4#
5# Copyright (C) 2016 Oracle. All Rights Reserved.
6#
7# Author: Darrick J. Wong <darrick.wong@oracle.com>
8#
9# This program is free software; you can redistribute it and/or
10# modify it under the terms of the GNU General Public License
11# as published by the Free Software Foundation; either version 2
12# of the License, or (at your option) any later version.
13#
14# This program is distributed in the hope that it would be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program; if not, write the Free Software Foundation,
21# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
22#
23# Rough guide to using this script:
24# Collect ftrace data from a deadlock:
25#
26# # trace-cmd record -e 'xfs_buf_*lock*' <other traces> &
27# <run command, hang system>^Z
28# # killall -INT trace-cmd
29# <wait for trace-cmd to spit out trace.dat>
30#
31# Now analyze the captured trace data:
32#
33# # trace-cmd report | xfsbuflock.py
34# === fsx-14956 ===
35# <trace data>
36# 3732.005575: xfs_buf_trylock_fail: dev 8:16 bno 0x1 nblks 0x1 hold 4 \
37# pincount 1 lock 0 flags DONE|KMEM caller 0xc009af36s
38# Locked buffers:
39# dev 8:16 bno 0x64c371 nblks 0x1 lock 1 owner fsx-14956@3732.005567
40# waiting: fsx-14954
41# dev 8:16 bno 0x64c380 nblks 0x8 lock 1 owner fsx-14956@3732.005571
42# dev 8:16 bno 0x64c378 nblks 0x8 lock 1 owner fsx-14956@3732.005570
43# === fsx-14954 ===
44# <trace data>
45# 3732.005592: xfs_buf_trylock_fail: dev 8:16 bno 0x64c371 nblks 0x1 hold 4 \
46# pincount 1 lock 0 flags ASYNC|DONE|KMEM caller 0xc009af36s
47# Locked buffers:
48# dev 8:16 bno 0x8 nblks 0x8 lock 1 owner fsx-14954@3732.005583
49# dev 8:16 bno 0x1 nblks 0x1 lock 1 owner fsx-14954@3732.005574
50# waiting: fsx-14956
51# waiting: fsx-14957
52# waiting: fsx-14958
53# dev 8:16 bno 0x10 nblks 0x8 lock 1 owner fsx-14954@3732.005585
54#
55# As you can see, fsx-14596 is locking AGFs in violation of the locking
56# order rules.
57
58import sys
59import fileinput
60from collections import namedtuple
61
62NR_BACKTRACE = 50
63
64class Process:
65 def __init__(self, pid):
66 self.pid = pid;
67 self.bufs = set()
68 self.locked_bufs = set()
69 self.backtrace = []
70
71 def dump(self):
72 print('=== %s ===' % self.pid)
73 for bt in self.backtrace:
74 print('%f: %s' % (bt.time, bt.descr))
75 print('Locked buffers:')
76 for buf in self.locked_bufs:
77 buf.dump()
78
79class Buffer:
80 def __init__(self, dev, bno, blen):
81 self.dev = dev
82 self.bno = int(bno, 0)
83 self.blen = int(blen, 0)
84 self.locked = False
85 self.locktime = None
86 self.owner = None
87 self.waiters = set()
ece9fa6e 88 self.lockline = 0
59d2274f
DW
89
90 def trylock(self, process, time):
c2b571f1
DW
91 if not self.locked:
92 self.lockdone(process, time)
59d2274f
DW
93
94 def lockdone(self, process, time):
95 if self.locked:
ece9fa6e
DW
96 print('Buffer 0x%x already locked at line %d? (line %d)' % \
97 (self.bno, self.lockline, nr))
59d2274f
DW
98 # process.dump()
99 # self.dump()
100 # assert False
101 if process in self.waiters:
102 self.waiters.remove(process)
103 self.locked = True
104 self.owner = process
105 self.locktime = time
ece9fa6e 106 self.lockline = nr
59d2274f
DW
107 process.locked_bufs.add(self)
108 process.bufs.add(self)
109 locked_buffers.add(self)
110
111 def waitlock(self, process):
112 self.waiters.add(process)
113
114 def unlock(self):
115 self.locked = False
116 if self in locked_buffers:
117 locked_buffers.remove(self)
118 if self.owner is not None and \
119 self in self.owner.locked_bufs:
120 self.owner.locked_bufs.remove(self)
121
122 def dump(self):
123 if self.owner is not None:
ece9fa6e
DW
124 pid = '%s@%f (line %d)' % \
125 (self.owner.pid, self.locktime, self.lockline)
59d2274f
DW
126 else:
127 pid = ''
128 print('dev %s bno 0x%x nblks 0x%x lock %d owner %s' % \
129 (self.dev, self.bno, self.blen, self.locked, \
130 pid))
131 for proc in self.waiters:
132 print(' waiting: %s' % proc.pid)
133
134Event = namedtuple('Event', 'time descr')
135
136# Read ftrace input, looking for events and for buffer lock info
137processes = {}
138buffers = {}
139locked_buffers = set()
140
141def getbuf(toks):
142 if int(toks[7], 0) == 18446744073709551615:
143 return None
144 bufkey = ' '.join(toks[4:10])
145 if bufkey in buffers:
146 return buffers[bufkey]
147 buf = Buffer(toks[5], toks[7], toks[9])
148 buffers[bufkey] = buf
149 return buf
150
151nr = 0
152for line in fileinput.input():
153 nr += 1
154 toks = line.split()
155 if len(toks) < 4:
156 continue
157 pid = toks[0]
f231685e
DW
158 try:
159 time = float(toks[2][:-1])
160 except:
161 continue
59d2274f
DW
162 fn = toks[3][:-1]
163
164 if pid in processes:
165 proc = processes[pid]
166 else:
167 proc = Process(pid)
168 processes[pid] = proc
169
170 if fn == 'xfs_buf_unlock' or fn == 'xfs_buf_item_unlock_stale':
171 buf = getbuf(toks)
172 if buf is not None:
173 buf.unlock()
174 elif fn == 'xfs_buf_lock_done':
175 buf = getbuf(toks)
176 if buf is not None:
177 buf.lockdone(proc, time)
178 elif fn == 'xfs_buf_lock':
179 buf = getbuf(toks)
180 if buf is not None:
181 buf.waitlock(proc)
182 elif fn == 'xfs_buf_trylock':
183 buf = getbuf(toks)
184 if buf is not None:
185 buf.trylock(proc, time)
186 elif fn == 'xfs_buf_item_unlock':
187 pass
188 else:
189 e = Event(time, ' '.join(toks[3:]))
190 proc.backtrace.append(e)
191 if len(proc.backtrace) > NR_BACKTRACE:
192 proc.backtrace.pop(0)
193
194deadlocked = set()
195for buf in locked_buffers:
196 deadlocked.add(buf.owner)
197
198for proc in deadlocked:
199 proc.dump()
200
201sys.exit(0)
202
203for key in buffers:
204 buf = buffers[key]
205 if buf.locked:
206 print('dev %s bno 0x%x len 0x%x owner %s' % (buf.dev, buf.bno, buf.blen, buf.owner.pid))
207 else:
208 print('dev %s bno 0x%x len 0x%x' % (buf.dev, buf.bno, buf.blen))
209
210sys.exit(0)
211
212for pid in processes:
213 proc = processes[pid]