]>
Commit | Line | Data |
---|---|---|
d6ac1c7e MCC |
1 | =================================================== |
2 | Adding reference counters (krefs) to kernel objects | |
3 | =================================================== | |
4 | ||
5 | :Author: Corey Minyard <minyard@acm.org> | |
6 | :Author: Thomas Hellstrom <thellstrom@vmware.com> | |
7 | ||
8 | A lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and | |
9 | presentation on krefs, which can be found at: | |
10 | ||
11 | - http://www.kroah.com/linux/talks/ols_2004_kref_paper/Reprint-Kroah-Hartman-OLS2004.pdf | |
12 | - http://www.kroah.com/linux/talks/ols_2004_kref_talk/ | |
13 | ||
14 | Introduction | |
15 | ============ | |
5c11c520 CM |
16 | |
17 | krefs allow you to add reference counters to your objects. If you | |
18 | have objects that are used in multiple places and passed around, and | |
19 | you don't have refcounts, your code is almost certainly broken. If | |
20 | you want refcounts, krefs are the way to go. | |
21 | ||
d6ac1c7e | 22 | To use a kref, add one to your data structures like:: |
5c11c520 | 23 | |
d6ac1c7e MCC |
24 | struct my_data |
25 | { | |
5c11c520 CM |
26 | . |
27 | . | |
28 | struct kref refcount; | |
29 | . | |
30 | . | |
d6ac1c7e | 31 | }; |
5c11c520 CM |
32 | |
33 | The kref can occur anywhere within the data structure. | |
34 | ||
d6ac1c7e MCC |
35 | Initialization |
36 | ============== | |
37 | ||
5c11c520 | 38 | You must initialize the kref after you allocate it. To do this, call |
d6ac1c7e | 39 | kref_init as so:: |
5c11c520 CM |
40 | |
41 | struct my_data *data; | |
42 | ||
43 | data = kmalloc(sizeof(*data), GFP_KERNEL); | |
44 | if (!data) | |
45 | return -ENOMEM; | |
46 | kref_init(&data->refcount); | |
47 | ||
48 | This sets the refcount in the kref to 1. | |
49 | ||
d6ac1c7e MCC |
50 | Kref rules |
51 | ========== | |
52 | ||
5c11c520 CM |
53 | Once you have an initialized kref, you must follow the following |
54 | rules: | |
55 | ||
56 | 1) If you make a non-temporary copy of a pointer, especially if | |
57 | it can be passed to another thread of execution, you must | |
d6ac1c7e MCC |
58 | increment the refcount with kref_get() before passing it off:: |
59 | ||
5c11c520 | 60 | kref_get(&data->refcount); |
d6ac1c7e | 61 | |
5c11c520 CM |
62 | If you already have a valid pointer to a kref-ed structure (the |
63 | refcount cannot go to zero) you may do this without a lock. | |
64 | ||
d6ac1c7e MCC |
65 | 2) When you are done with a pointer, you must call kref_put():: |
66 | ||
5c11c520 | 67 | kref_put(&data->refcount, data_release); |
d6ac1c7e | 68 | |
5c11c520 CM |
69 | If this is the last reference to the pointer, the release |
70 | routine will be called. If the code never tries to get | |
71 | a valid pointer to a kref-ed structure without already | |
72 | holding a valid pointer, it is safe to do this without | |
73 | a lock. | |
74 | ||
75 | 3) If the code attempts to gain a reference to a kref-ed structure | |
76 | without already holding a valid pointer, it must serialize access | |
77 | where a kref_put() cannot occur during the kref_get(), and the | |
78 | structure must remain valid during the kref_get(). | |
79 | ||
80 | For example, if you allocate some data and then pass it to another | |
d6ac1c7e | 81 | thread to process:: |
5c11c520 | 82 | |
d6ac1c7e MCC |
83 | void data_release(struct kref *ref) |
84 | { | |
5c11c520 CM |
85 | struct my_data *data = container_of(ref, struct my_data, refcount); |
86 | kfree(data); | |
d6ac1c7e | 87 | } |
5c11c520 | 88 | |
d6ac1c7e MCC |
89 | void more_data_handling(void *cb_data) |
90 | { | |
5c11c520 CM |
91 | struct my_data *data = cb_data; |
92 | . | |
93 | . do stuff with data here | |
94 | . | |
b7cc4a87 | 95 | kref_put(&data->refcount, data_release); |
d6ac1c7e | 96 | } |
5c11c520 | 97 | |
d6ac1c7e MCC |
98 | int my_data_handler(void) |
99 | { | |
5c11c520 CM |
100 | int rv = 0; |
101 | struct my_data *data; | |
102 | struct task_struct *task; | |
103 | data = kmalloc(sizeof(*data), GFP_KERNEL); | |
104 | if (!data) | |
105 | return -ENOMEM; | |
106 | kref_init(&data->refcount); | |
107 | ||
108 | kref_get(&data->refcount); | |
109 | task = kthread_run(more_data_handling, data, "more_data_handling"); | |
110 | if (task == ERR_PTR(-ENOMEM)) { | |
111 | rv = -ENOMEM; | |
fd0f50db | 112 | kref_put(&data->refcount, data_release); |
5c11c520 CM |
113 | goto out; |
114 | } | |
115 | ||
116 | . | |
117 | . do stuff with data here | |
118 | . | |
d6ac1c7e | 119 | out: |
5c11c520 CM |
120 | kref_put(&data->refcount, data_release); |
121 | return rv; | |
d6ac1c7e | 122 | } |
5c11c520 CM |
123 | |
124 | This way, it doesn't matter what order the two threads handle the | |
125 | data, the kref_put() handles knowing when the data is not referenced | |
126 | any more and releasing it. The kref_get() does not require a lock, | |
127 | since we already have a valid pointer that we own a refcount for. The | |
128 | put needs no lock because nothing tries to get the data without | |
129 | already holding a pointer. | |
130 | ||
ef45e78f MS |
131 | In the above example, kref_put() will be called 2 times in both success |
132 | and error paths. This is necessary because the reference count got | |
133 | incremented 2 times by kref_init() and kref_get(). | |
134 | ||
5c11c520 | 135 | Note that the "before" in rule 1 is very important. You should never |
d6ac1c7e | 136 | do something like:: |
5c11c520 CM |
137 | |
138 | task = kthread_run(more_data_handling, data, "more_data_handling"); | |
139 | if (task == ERR_PTR(-ENOMEM)) { | |
140 | rv = -ENOMEM; | |
141 | goto out; | |
142 | } else | |
143 | /* BAD BAD BAD - get is after the handoff */ | |
144 | kref_get(&data->refcount); | |
145 | ||
146 | Don't assume you know what you are doing and use the above construct. | |
147 | First of all, you may not know what you are doing. Second, you may | |
148 | know what you are doing (there are some situations where locking is | |
149 | involved where the above may be legal) but someone else who doesn't | |
150 | know what they are doing may change the code or copy the code. It's | |
151 | bad style. Don't do it. | |
152 | ||
153 | There are some situations where you can optimize the gets and puts. | |
154 | For instance, if you are done with an object and enqueuing it for | |
155 | something else or passing it off to something else, there is no reason | |
d6ac1c7e | 156 | to do a get then a put:: |
5c11c520 CM |
157 | |
158 | /* Silly extra get and put */ | |
159 | kref_get(&obj->ref); | |
160 | enqueue(obj); | |
161 | kref_put(&obj->ref, obj_cleanup); | |
162 | ||
d6ac1c7e | 163 | Just do the enqueue. A comment about this is always welcome:: |
5c11c520 CM |
164 | |
165 | enqueue(obj); | |
166 | /* We are done with obj, so we pass our refcount off | |
167 | to the queue. DON'T TOUCH obj AFTER HERE! */ | |
168 | ||
169 | The last rule (rule 3) is the nastiest one to handle. Say, for | |
170 | instance, you have a list of items that are each kref-ed, and you wish | |
171 | to get the first one. You can't just pull the first item off the list | |
172 | and kref_get() it. That violates rule 3 because you are not already | |
1373bed3 | 173 | holding a valid pointer. You must add a mutex (or some other lock). |
d6ac1c7e MCC |
174 | For instance:: |
175 | ||
176 | static DEFINE_MUTEX(mutex); | |
177 | static LIST_HEAD(q); | |
178 | struct my_data | |
179 | { | |
180 | struct kref refcount; | |
181 | struct list_head link; | |
182 | }; | |
183 | ||
184 | static struct my_data *get_entry() | |
185 | { | |
186 | struct my_data *entry = NULL; | |
187 | mutex_lock(&mutex); | |
188 | if (!list_empty(&q)) { | |
189 | entry = container_of(q.next, struct my_data, link); | |
190 | kref_get(&entry->refcount); | |
191 | } | |
192 | mutex_unlock(&mutex); | |
193 | return entry; | |
5c11c520 | 194 | } |
5c11c520 | 195 | |
d6ac1c7e MCC |
196 | static void release_entry(struct kref *ref) |
197 | { | |
198 | struct my_data *entry = container_of(ref, struct my_data, refcount); | |
5c11c520 | 199 | |
d6ac1c7e MCC |
200 | list_del(&entry->link); |
201 | kfree(entry); | |
202 | } | |
5c11c520 | 203 | |
d6ac1c7e MCC |
204 | static void put_entry(struct my_data *entry) |
205 | { | |
206 | mutex_lock(&mutex); | |
207 | kref_put(&entry->refcount, release_entry); | |
208 | mutex_unlock(&mutex); | |
209 | } | |
5c11c520 CM |
210 | |
211 | The kref_put() return value is useful if you do not want to hold the | |
212 | lock during the whole release operation. Say you didn't want to call | |
213 | kfree() with the lock held in the example above (since it is kind of | |
d6ac1c7e | 214 | pointless to do so). You could use kref_put() as follows:: |
5c11c520 | 215 | |
d6ac1c7e MCC |
216 | static void release_entry(struct kref *ref) |
217 | { | |
218 | /* All work is done after the return from kref_put(). */ | |
219 | } | |
5c11c520 | 220 | |
d6ac1c7e MCC |
221 | static void put_entry(struct my_data *entry) |
222 | { | |
223 | mutex_lock(&mutex); | |
224 | if (kref_put(&entry->refcount, release_entry)) { | |
225 | list_del(&entry->link); | |
226 | mutex_unlock(&mutex); | |
227 | kfree(entry); | |
228 | } else | |
229 | mutex_unlock(&mutex); | |
230 | } | |
5c11c520 CM |
231 | |
232 | This is really more useful if you have to call other routines as part | |
233 | of the free operations that could take a long time or might claim the | |
234 | same lock. Note that doing everything in the release routine is still | |
235 | preferred as it is a little neater. | |
236 | ||
a82b8db0 | 237 | The above example could also be optimized using kref_get_unless_zero() in |
d6ac1c7e MCC |
238 | the following way:: |
239 | ||
240 | static struct my_data *get_entry() | |
241 | { | |
242 | struct my_data *entry = NULL; | |
243 | mutex_lock(&mutex); | |
244 | if (!list_empty(&q)) { | |
245 | entry = container_of(q.next, struct my_data, link); | |
246 | if (!kref_get_unless_zero(&entry->refcount)) | |
247 | entry = NULL; | |
248 | } | |
249 | mutex_unlock(&mutex); | |
250 | return entry; | |
a82b8db0 | 251 | } |
a82b8db0 | 252 | |
d6ac1c7e MCC |
253 | static void release_entry(struct kref *ref) |
254 | { | |
255 | struct my_data *entry = container_of(ref, struct my_data, refcount); | |
a82b8db0 | 256 | |
d6ac1c7e MCC |
257 | mutex_lock(&mutex); |
258 | list_del(&entry->link); | |
259 | mutex_unlock(&mutex); | |
260 | kfree(entry); | |
261 | } | |
a82b8db0 | 262 | |
d6ac1c7e MCC |
263 | static void put_entry(struct my_data *entry) |
264 | { | |
265 | kref_put(&entry->refcount, release_entry); | |
266 | } | |
a82b8db0 TH |
267 | |
268 | Which is useful to remove the mutex lock around kref_put() in put_entry(), but | |
269 | it's important that kref_get_unless_zero is enclosed in the same critical | |
270 | section that finds the entry in the lookup table, | |
271 | otherwise kref_get_unless_zero may reference already freed memory. | |
272 | Note that it is illegal to use kref_get_unless_zero without checking its | |
273 | return value. If you are sure (by already having a valid pointer) that | |
274 | kref_get_unless_zero() will return true, then use kref_get() instead. | |
275 | ||
d6ac1c7e MCC |
276 | Krefs and RCU |
277 | ============= | |
a82b8db0 | 278 | |
d6ac1c7e MCC |
279 | The function kref_get_unless_zero also makes it possible to use rcu |
280 | locking for lookups in the above example:: | |
281 | ||
282 | struct my_data | |
283 | { | |
284 | struct rcu_head rhead; | |
285 | . | |
286 | struct kref refcount; | |
287 | . | |
288 | . | |
289 | }; | |
290 | ||
291 | static struct my_data *get_entry_rcu() | |
292 | { | |
293 | struct my_data *entry = NULL; | |
294 | rcu_read_lock(); | |
295 | if (!list_empty(&q)) { | |
296 | entry = container_of(q.next, struct my_data, link); | |
297 | if (!kref_get_unless_zero(&entry->refcount)) | |
298 | entry = NULL; | |
299 | } | |
300 | rcu_read_unlock(); | |
301 | return entry; | |
a82b8db0 | 302 | } |
a82b8db0 | 303 | |
d6ac1c7e MCC |
304 | static void release_entry_rcu(struct kref *ref) |
305 | { | |
306 | struct my_data *entry = container_of(ref, struct my_data, refcount); | |
a82b8db0 | 307 | |
d6ac1c7e MCC |
308 | mutex_lock(&mutex); |
309 | list_del_rcu(&entry->link); | |
310 | mutex_unlock(&mutex); | |
311 | kfree_rcu(entry, rhead); | |
312 | } | |
a82b8db0 | 313 | |
d6ac1c7e MCC |
314 | static void put_entry(struct my_data *entry) |
315 | { | |
316 | kref_put(&entry->refcount, release_entry_rcu); | |
317 | } | |
a82b8db0 TH |
318 | |
319 | But note that the struct kref member needs to remain in valid memory for a | |
320 | rcu grace period after release_entry_rcu was called. That can be accomplished | |
321 | by using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu() | |
322 | before using kfree, but note that synchronize_rcu() may sleep for a | |
323 | substantial amount of time. |