]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - tools/xfsbuflock.py
xfs: remote attribute blocks aren't really userdata
[thirdparty/xfsprogs-dev.git] / tools / xfsbuflock.py
CommitLineData
59d2274f
DW
1#!/usr/bin/env python3
2
3# Read ftrace input, looking for XFS buffer deadlocks.
4#
5# Copyright (C) 2016 Oracle. All Rights Reserved.
6#
7# Author: Darrick J. Wong <darrick.wong@oracle.com>
8#
9# This program is free software; you can redistribute it and/or
10# modify it under the terms of the GNU General Public License
11# as published by the Free Software Foundation; either version 2
12# of the License, or (at your option) any later version.
13#
14# This program is distributed in the hope that it would be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program; if not, write the Free Software Foundation,
21# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
22#
23# Rough guide to using this script:
24# Collect ftrace data from a deadlock:
25#
26# # trace-cmd record -e 'xfs_buf_*lock*' <other traces> &
27# <run command, hang system>^Z
28# # killall -INT trace-cmd
29# <wait for trace-cmd to spit out trace.dat>
30#
31# Now analyze the captured trace data:
32#
33# # trace-cmd report | xfsbuflock.py
34# === fsx-14956 ===
35# <trace data>
36# 3732.005575: xfs_buf_trylock_fail: dev 8:16 bno 0x1 nblks 0x1 hold 4 \
37# pincount 1 lock 0 flags DONE|KMEM caller 0xc009af36s
38# Locked buffers:
39# dev 8:16 bno 0x64c371 nblks 0x1 lock 1 owner fsx-14956@3732.005567
40# waiting: fsx-14954
41# dev 8:16 bno 0x64c380 nblks 0x8 lock 1 owner fsx-14956@3732.005571
42# dev 8:16 bno 0x64c378 nblks 0x8 lock 1 owner fsx-14956@3732.005570
43# === fsx-14954 ===
44# <trace data>
45# 3732.005592: xfs_buf_trylock_fail: dev 8:16 bno 0x64c371 nblks 0x1 hold 4 \
46# pincount 1 lock 0 flags ASYNC|DONE|KMEM caller 0xc009af36s
47# Locked buffers:
48# dev 8:16 bno 0x8 nblks 0x8 lock 1 owner fsx-14954@3732.005583
49# dev 8:16 bno 0x1 nblks 0x1 lock 1 owner fsx-14954@3732.005574
50# waiting: fsx-14956
51# waiting: fsx-14957
52# waiting: fsx-14958
53# dev 8:16 bno 0x10 nblks 0x8 lock 1 owner fsx-14954@3732.005585
54#
55# As you can see, fsx-14596 is locking AGFs in violation of the locking
56# order rules.
57
58import sys
59import fileinput
60from collections import namedtuple
61
62NR_BACKTRACE = 50
63
64class Process:
65 def __init__(self, pid):
66 self.pid = pid;
67 self.bufs = set()
68 self.locked_bufs = set()
69 self.backtrace = []
70
71 def dump(self):
72 print('=== %s ===' % self.pid)
73 for bt in self.backtrace:
74 print('%f: %s' % (bt.time, bt.descr))
75 print('Locked buffers:')
76 for buf in self.locked_bufs:
77 buf.dump()
78
79class Buffer:
80 def __init__(self, dev, bno, blen):
81 self.dev = dev
82 self.bno = int(bno, 0)
83 self.blen = int(blen, 0)
84 self.locked = False
85 self.locktime = None
86 self.owner = None
87 self.waiters = set()
88
89 def trylock(self, process, time):
90 self.lockdone(process, time)
91
92 def lockdone(self, process, time):
93 if self.locked:
94 print('Buffer already locked on line %d?!' % nr)
95 # process.dump()
96 # self.dump()
97 # assert False
98 if process in self.waiters:
99 self.waiters.remove(process)
100 self.locked = True
101 self.owner = process
102 self.locktime = time
103 process.locked_bufs.add(self)
104 process.bufs.add(self)
105 locked_buffers.add(self)
106
107 def waitlock(self, process):
108 self.waiters.add(process)
109
110 def unlock(self):
111 self.locked = False
112 if self in locked_buffers:
113 locked_buffers.remove(self)
114 if self.owner is not None and \
115 self in self.owner.locked_bufs:
116 self.owner.locked_bufs.remove(self)
117
118 def dump(self):
119 if self.owner is not None:
120 pid = '%s@%f' % (self.owner.pid, self.locktime)
121 else:
122 pid = ''
123 print('dev %s bno 0x%x nblks 0x%x lock %d owner %s' % \
124 (self.dev, self.bno, self.blen, self.locked, \
125 pid))
126 for proc in self.waiters:
127 print(' waiting: %s' % proc.pid)
128
129Event = namedtuple('Event', 'time descr')
130
131# Read ftrace input, looking for events and for buffer lock info
132processes = {}
133buffers = {}
134locked_buffers = set()
135
136def getbuf(toks):
137 if int(toks[7], 0) == 18446744073709551615:
138 return None
139 bufkey = ' '.join(toks[4:10])
140 if bufkey in buffers:
141 return buffers[bufkey]
142 buf = Buffer(toks[5], toks[7], toks[9])
143 buffers[bufkey] = buf
144 return buf
145
146nr = 0
147for line in fileinput.input():
148 nr += 1
149 toks = line.split()
150 if len(toks) < 4:
151 continue
152 pid = toks[0]
153 time = float(toks[2][:-1])
154 fn = toks[3][:-1]
155
156 if pid in processes:
157 proc = processes[pid]
158 else:
159 proc = Process(pid)
160 processes[pid] = proc
161
162 if fn == 'xfs_buf_unlock' or fn == 'xfs_buf_item_unlock_stale':
163 buf = getbuf(toks)
164 if buf is not None:
165 buf.unlock()
166 elif fn == 'xfs_buf_lock_done':
167 buf = getbuf(toks)
168 if buf is not None:
169 buf.lockdone(proc, time)
170 elif fn == 'xfs_buf_lock':
171 buf = getbuf(toks)
172 if buf is not None:
173 buf.waitlock(proc)
174 elif fn == 'xfs_buf_trylock':
175 buf = getbuf(toks)
176 if buf is not None:
177 buf.trylock(proc, time)
178 elif fn == 'xfs_buf_item_unlock':
179 pass
180 else:
181 e = Event(time, ' '.join(toks[3:]))
182 proc.backtrace.append(e)
183 if len(proc.backtrace) > NR_BACKTRACE:
184 proc.backtrace.pop(0)
185
186deadlocked = set()
187for buf in locked_buffers:
188 deadlocked.add(buf.owner)
189
190for proc in deadlocked:
191 proc.dump()
192
193sys.exit(0)
194
195for key in buffers:
196 buf = buffers[key]
197 if buf.locked:
198 print('dev %s bno 0x%x len 0x%x owner %s' % (buf.dev, buf.bno, buf.blen, buf.owner.pid))
199 else:
200 print('dev %s bno 0x%x len 0x%x' % (buf.dev, buf.bno, buf.blen))
201
202sys.exit(0)
203
204for pid in processes:
205 proc = processes[pid]