]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - tools/xfsbuflock.py
xfs: fix maxicount division by zero error
[thirdparty/xfsprogs-dev.git] / tools / xfsbuflock.py
CommitLineData
59d2274f
DW
1#!/usr/bin/env python3
2
959ef981 3# SPDX-License-Identifier: GPL-2.0+
59d2274f
DW
4# Copyright (C) 2016 Oracle. All Rights Reserved.
5#
6# Author: Darrick J. Wong <darrick.wong@oracle.com>
959ef981
DC
7
8# Read ftrace input, looking for XFS buffer deadlocks.
59d2274f
DW
9#
10# Rough guide to using this script:
11# Collect ftrace data from a deadlock:
12#
13# # trace-cmd record -e 'xfs_buf_*lock*' <other traces> &
14# <run command, hang system>^Z
15# # killall -INT trace-cmd
16# <wait for trace-cmd to spit out trace.dat>
17#
18# Now analyze the captured trace data:
19#
20# # trace-cmd report | xfsbuflock.py
21# === fsx-14956 ===
22# <trace data>
23# 3732.005575: xfs_buf_trylock_fail: dev 8:16 bno 0x1 nblks 0x1 hold 4 \
24# pincount 1 lock 0 flags DONE|KMEM caller 0xc009af36s
25# Locked buffers:
26# dev 8:16 bno 0x64c371 nblks 0x1 lock 1 owner fsx-14956@3732.005567
27# waiting: fsx-14954
28# dev 8:16 bno 0x64c380 nblks 0x8 lock 1 owner fsx-14956@3732.005571
29# dev 8:16 bno 0x64c378 nblks 0x8 lock 1 owner fsx-14956@3732.005570
30# === fsx-14954 ===
31# <trace data>
32# 3732.005592: xfs_buf_trylock_fail: dev 8:16 bno 0x64c371 nblks 0x1 hold 4 \
33# pincount 1 lock 0 flags ASYNC|DONE|KMEM caller 0xc009af36s
34# Locked buffers:
35# dev 8:16 bno 0x8 nblks 0x8 lock 1 owner fsx-14954@3732.005583
36# dev 8:16 bno 0x1 nblks 0x1 lock 1 owner fsx-14954@3732.005574
37# waiting: fsx-14956
38# waiting: fsx-14957
39# waiting: fsx-14958
40# dev 8:16 bno 0x10 nblks 0x8 lock 1 owner fsx-14954@3732.005585
41#
42# As you can see, fsx-14596 is locking AGFs in violation of the locking
43# order rules.
44
45import sys
46import fileinput
47from collections import namedtuple
48
49NR_BACKTRACE = 50
50
51class Process:
52 def __init__(self, pid):
53 self.pid = pid;
54 self.bufs = set()
55 self.locked_bufs = set()
56 self.backtrace = []
57
58 def dump(self):
59 print('=== %s ===' % self.pid)
60 for bt in self.backtrace:
61 print('%f: %s' % (bt.time, bt.descr))
62 print('Locked buffers:')
63 for buf in self.locked_bufs:
64 buf.dump()
65
66class Buffer:
67 def __init__(self, dev, bno, blen):
68 self.dev = dev
69 self.bno = int(bno, 0)
70 self.blen = int(blen, 0)
71 self.locked = False
72 self.locktime = None
73 self.owner = None
74 self.waiters = set()
ece9fa6e 75 self.lockline = 0
59d2274f
DW
76
77 def trylock(self, process, time):
c2b571f1
DW
78 if not self.locked:
79 self.lockdone(process, time)
59d2274f 80
184798fd
DW
81 def init(self, process, time):
82 # Buffers are initialized locked, but we could be allocating
83 # a surplus buffer while trying to grab a buffer that may or
84 # may not already exist.
85 if not self.locked:
86 self.lockdone(process, time)
87
59d2274f
DW
88 def lockdone(self, process, time):
89 if self.locked:
ece9fa6e
DW
90 print('Buffer 0x%x already locked at line %d? (line %d)' % \
91 (self.bno, self.lockline, nr))
59d2274f
DW
92 # process.dump()
93 # self.dump()
94 # assert False
95 if process in self.waiters:
96 self.waiters.remove(process)
97 self.locked = True
98 self.owner = process
99 self.locktime = time
ece9fa6e 100 self.lockline = nr
59d2274f
DW
101 process.locked_bufs.add(self)
102 process.bufs.add(self)
103 locked_buffers.add(self)
104
105 def waitlock(self, process):
106 self.waiters.add(process)
107
108 def unlock(self):
109 self.locked = False
110 if self in locked_buffers:
111 locked_buffers.remove(self)
112 if self.owner is not None and \
113 self in self.owner.locked_bufs:
114 self.owner.locked_bufs.remove(self)
115
116 def dump(self):
117 if self.owner is not None:
ece9fa6e
DW
118 pid = '%s@%f (line %d)' % \
119 (self.owner.pid, self.locktime, self.lockline)
59d2274f
DW
120 else:
121 pid = ''
122 print('dev %s bno 0x%x nblks 0x%x lock %d owner %s' % \
123 (self.dev, self.bno, self.blen, self.locked, \
124 pid))
125 for proc in self.waiters:
126 print(' waiting: %s' % proc.pid)
127
128Event = namedtuple('Event', 'time descr')
129
130# Read ftrace input, looking for events and for buffer lock info
131processes = {}
132buffers = {}
133locked_buffers = set()
134
135def getbuf(toks):
136 if int(toks[7], 0) == 18446744073709551615:
137 return None
138 bufkey = ' '.join(toks[4:10])
139 if bufkey in buffers:
140 return buffers[bufkey]
141 buf = Buffer(toks[5], toks[7], toks[9])
142 buffers[bufkey] = buf
143 return buf
144
145nr = 0
146for line in fileinput.input():
147 nr += 1
148 toks = line.split()
149 if len(toks) < 4:
150 continue
151 pid = toks[0]
f231685e
DW
152 try:
153 time = float(toks[2][:-1])
154 except:
155 continue
59d2274f
DW
156 fn = toks[3][:-1]
157
158 if pid in processes:
159 proc = processes[pid]
160 else:
161 proc = Process(pid)
162 processes[pid] = proc
163
164 if fn == 'xfs_buf_unlock' or fn == 'xfs_buf_item_unlock_stale':
165 buf = getbuf(toks)
166 if buf is not None:
167 buf.unlock()
168 elif fn == 'xfs_buf_lock_done':
169 buf = getbuf(toks)
170 if buf is not None:
171 buf.lockdone(proc, time)
172 elif fn == 'xfs_buf_lock':
173 buf = getbuf(toks)
174 if buf is not None:
175 buf.waitlock(proc)
176 elif fn == 'xfs_buf_trylock':
177 buf = getbuf(toks)
178 if buf is not None:
179 buf.trylock(proc, time)
184798fd
DW
180 elif fn == 'xfs_buf_init':
181 buf = getbuf(toks)
182 if buf is not None:
183 buf.init(proc, time)
59d2274f
DW
184 elif fn == 'xfs_buf_item_unlock':
185 pass
186 else:
187 e = Event(time, ' '.join(toks[3:]))
188 proc.backtrace.append(e)
189 if len(proc.backtrace) > NR_BACKTRACE:
190 proc.backtrace.pop(0)
191
192deadlocked = set()
193for buf in locked_buffers:
194 deadlocked.add(buf.owner)
195
196for proc in deadlocked:
197 proc.dump()
198
199sys.exit(0)
200
201for key in buffers:
202 buf = buffers[key]
203 if buf.locked:
204 print('dev %s bno 0x%x len 0x%x owner %s' % (buf.dev, buf.bno, buf.blen, buf.owner.pid))
205 else:
206 print('dev %s bno 0x%x len 0x%x' % (buf.dev, buf.bno, buf.blen))
207
208sys.exit(0)
209
210for pid in processes:
211 proc = processes[pid]