]>
Commit | Line | Data |
---|---|---|
55716d26 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
b8441ed2 TH |
2 | /* |
3 | * fs/kernfs/file.c - kernfs file implementation | |
4 | * | |
5 | * Copyright (c) 2001-3 Patrick Mochel | |
6 | * Copyright (c) 2007 SUSE Linux Products GmbH | |
7 | * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> | |
b8441ed2 | 8 | */ |
414985ae TH |
9 | |
10 | #include <linux/fs.h> | |
11 | #include <linux/seq_file.h> | |
12 | #include <linux/slab.h> | |
13 | #include <linux/poll.h> | |
14 | #include <linux/pagemap.h> | |
589ee628 | 15 | #include <linux/sched/mm.h> |
d911d987 | 16 | #include <linux/fsnotify.h> |
4eaad21a | 17 | #include <linux/uio.h> |
414985ae TH |
18 | |
19 | #include "kernfs-internal.h" | |
20 | ||
c525aadd | 21 | struct kernfs_open_node { |
086c00c7 | 22 | struct rcu_head rcu_head; |
414985ae TH |
23 | atomic_t event; |
24 | wait_queue_head_t poll; | |
c525aadd | 25 | struct list_head files; /* goes through kernfs_open_file.list */ |
414985ae TH |
26 | }; |
27 | ||
2fd26970 IK |
28 | /* |
29 | * kernfs_notify() may be called from any context and bounces notifications | |
30 | * through a work item. To minimize space overhead in kernfs_node, the | |
31 | * pending queue is implemented as a singly linked list of kernfs_nodes. | |
32 | * The list is terminated with the self pointer so that whether a | |
33 | * kernfs_node is on the list or not can be determined by testing the next | |
34 | * pointer for NULL. | |
ecca47ce | 35 | */ |
2fd26970 | 36 | #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) |
ecca47ce | 37 | |
2fd26970 IK |
38 | static DEFINE_SPINLOCK(kernfs_notify_lock); |
39 | static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; | |
ecca47ce | 40 | |
41448c61 IK |
41 | static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn) |
42 | { | |
1d25b84e IK |
43 | int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS); |
44 | ||
45 | return &kernfs_locks->open_file_mutex[idx]; | |
41448c61 IK |
46 | } |
47 | ||
48 | static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn) | |
49 | { | |
50 | struct mutex *lock; | |
51 | ||
52 | lock = kernfs_open_file_mutex_ptr(kn); | |
53 | ||
54 | mutex_lock(lock); | |
55 | ||
56 | return lock; | |
57 | } | |
58 | ||
086c00c7 IK |
59 | /** |
60 | * kernfs_deref_open_node - Get kernfs_open_node corresponding to @kn. | |
61 | * | |
62 | * @of: associated kernfs_open_file instance. | |
63 | * @kn: target kernfs_node. | |
64 | * | |
65 | * Fetch and return ->attr.open of @kn if @of->list is non empty. | |
66 | * If @of->list is not empty we can safely assume that @of is on | |
67 | * @kn->attr.open->files list and this guarantees that @kn->attr.open | |
68 | * will not vanish i.e. dereferencing outside RCU read-side critical | |
69 | * section is safe here. | |
70 | * | |
71 | * The caller needs to make sure that @of->list is not empty. | |
72 | */ | |
73 | static struct kernfs_open_node * | |
74 | kernfs_deref_open_node(struct kernfs_open_file *of, struct kernfs_node *kn) | |
75 | { | |
76 | struct kernfs_open_node *on; | |
77 | ||
78 | on = rcu_dereference_check(kn->attr.open, !list_empty(&of->list)); | |
79 | ||
80 | return on; | |
81 | } | |
82 | ||
83 | /** | |
84 | * kernfs_deref_open_node_protected - Get kernfs_open_node corresponding to @kn | |
85 | * | |
86 | * @kn: target kernfs_node. | |
87 | * | |
88 | * Fetch and return ->attr.open of @kn when caller holds the | |
41448c61 | 89 | * kernfs_open_file_mutex_ptr(kn). |
086c00c7 | 90 | * |
41448c61 | 91 | * Update of ->attr.open happens under kernfs_open_file_mutex_ptr(kn). So when |
086c00c7 IK |
92 | * the caller guarantees that this mutex is being held, other updaters can't |
93 | * change ->attr.open and this means that we can safely deref ->attr.open | |
94 | * outside RCU read-side critical section. | |
95 | * | |
96 | * The caller needs to make sure that kernfs_open_file_mutex is held. | |
97 | */ | |
98 | static struct kernfs_open_node * | |
99 | kernfs_deref_open_node_protected(struct kernfs_node *kn) | |
100 | { | |
101 | return rcu_dereference_protected(kn->attr.open, | |
41448c61 | 102 | lockdep_is_held(kernfs_open_file_mutex_ptr(kn))); |
086c00c7 IK |
103 | } |
104 | ||
c525aadd | 105 | static struct kernfs_open_file *kernfs_of(struct file *file) |
414985ae TH |
106 | { |
107 | return ((struct seq_file *)file->private_data)->private; | |
108 | } | |
109 | ||
110 | /* | |
324a56e1 | 111 | * Determine the kernfs_ops for the given kernfs_node. This function must |
414985ae TH |
112 | * be called while holding an active reference. |
113 | */ | |
324a56e1 | 114 | static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn) |
414985ae | 115 | { |
df23fc39 | 116 | if (kn->flags & KERNFS_LOCKDEP) |
324a56e1 | 117 | lockdep_assert_held(kn); |
adc5e8b5 | 118 | return kn->attr.ops; |
414985ae TH |
119 | } |
120 | ||
bb305947 TH |
121 | /* |
122 | * As kernfs_seq_stop() is also called after kernfs_seq_start() or | |
123 | * kernfs_seq_next() failure, it needs to distinguish whether it's stopping | |
124 | * a seq_file iteration which is fully initialized with an active reference | |
125 | * or an aborted kernfs_seq_start() due to get_active failure. The | |
126 | * position pointer is the only context for each seq_file iteration and | |
127 | * thus the stop condition should be encoded in it. As the return value is | |
128 | * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable | |
129 | * choice to indicate get_active failure. | |
130 | * | |
131 | * Unfortunately, this is complicated due to the optional custom seq_file | |
132 | * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop() | |
133 | * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or | |
134 | * custom seq_file operations and thus can't decide whether put_active | |
135 | * should be performed or not only on ERR_PTR(-ENODEV). | |
136 | * | |
137 | * This is worked around by factoring out the custom seq_stop() and | |
138 | * put_active part into kernfs_seq_stop_active(), skipping it from | |
139 | * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after | |
140 | * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures | |
141 | * that kernfs_seq_stop_active() is skipped only after get_active failure. | |
142 | */ | |
143 | static void kernfs_seq_stop_active(struct seq_file *sf, void *v) | |
144 | { | |
145 | struct kernfs_open_file *of = sf->private; | |
146 | const struct kernfs_ops *ops = kernfs_ops(of->kn); | |
147 | ||
148 | if (ops->seq_stop) | |
149 | ops->seq_stop(sf, v); | |
150 | kernfs_put_active(of->kn); | |
151 | } | |
152 | ||
414985ae TH |
153 | static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) |
154 | { | |
c525aadd | 155 | struct kernfs_open_file *of = sf->private; |
414985ae TH |
156 | const struct kernfs_ops *ops; |
157 | ||
158 | /* | |
2b75869b | 159 | * @of->mutex nests outside active ref and is primarily to ensure that |
414985ae TH |
160 | * the ops aren't called concurrently for the same open file. |
161 | */ | |
162 | mutex_lock(&of->mutex); | |
c637b8ac | 163 | if (!kernfs_get_active(of->kn)) |
414985ae TH |
164 | return ERR_PTR(-ENODEV); |
165 | ||
324a56e1 | 166 | ops = kernfs_ops(of->kn); |
414985ae | 167 | if (ops->seq_start) { |
bb305947 TH |
168 | void *next = ops->seq_start(sf, ppos); |
169 | /* see the comment above kernfs_seq_stop_active() */ | |
170 | if (next == ERR_PTR(-ENODEV)) | |
171 | kernfs_seq_stop_active(sf, next); | |
172 | return next; | |
414985ae | 173 | } |
90b2433e | 174 | return single_start(sf, ppos); |
414985ae TH |
175 | } |
176 | ||
177 | static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos) | |
178 | { | |
c525aadd | 179 | struct kernfs_open_file *of = sf->private; |
324a56e1 | 180 | const struct kernfs_ops *ops = kernfs_ops(of->kn); |
414985ae TH |
181 | |
182 | if (ops->seq_next) { | |
bb305947 TH |
183 | void *next = ops->seq_next(sf, v, ppos); |
184 | /* see the comment above kernfs_seq_stop_active() */ | |
185 | if (next == ERR_PTR(-ENODEV)) | |
186 | kernfs_seq_stop_active(sf, next); | |
187 | return next; | |
414985ae TH |
188 | } else { |
189 | /* | |
190 | * The same behavior and code as single_open(), always | |
191 | * terminate after the initial read. | |
192 | */ | |
193 | ++*ppos; | |
194 | return NULL; | |
195 | } | |
196 | } | |
197 | ||
198 | static void kernfs_seq_stop(struct seq_file *sf, void *v) | |
199 | { | |
c525aadd | 200 | struct kernfs_open_file *of = sf->private; |
414985ae | 201 | |
bb305947 TH |
202 | if (v != ERR_PTR(-ENODEV)) |
203 | kernfs_seq_stop_active(sf, v); | |
414985ae TH |
204 | mutex_unlock(&of->mutex); |
205 | } | |
206 | ||
207 | static int kernfs_seq_show(struct seq_file *sf, void *v) | |
208 | { | |
c525aadd | 209 | struct kernfs_open_file *of = sf->private; |
086c00c7 | 210 | struct kernfs_open_node *on = kernfs_deref_open_node(of, of->kn); |
414985ae | 211 | |
086c00c7 IK |
212 | if (!on) |
213 | return -EINVAL; | |
214 | ||
215 | of->event = atomic_read(&on->event); | |
414985ae | 216 | |
adc5e8b5 | 217 | return of->kn->attr.ops->seq_show(sf, v); |
414985ae TH |
218 | } |
219 | ||
220 | static const struct seq_operations kernfs_seq_ops = { | |
221 | .start = kernfs_seq_start, | |
222 | .next = kernfs_seq_next, | |
223 | .stop = kernfs_seq_stop, | |
224 | .show = kernfs_seq_show, | |
225 | }; | |
226 | ||
227 | /* | |
228 | * As reading a bin file can have side-effects, the exact offset and bytes | |
229 | * specified in read(2) call should be passed to the read callback making | |
230 | * it difficult to use seq_file. Implement simplistic custom buffering for | |
231 | * bin files. | |
232 | */ | |
4eaad21a | 233 | static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) |
414985ae | 234 | { |
4eaad21a CH |
235 | struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); |
236 | ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE); | |
414985ae | 237 | const struct kernfs_ops *ops; |
086c00c7 | 238 | struct kernfs_open_node *on; |
414985ae TH |
239 | char *buf; |
240 | ||
4ef67a8c | 241 | buf = of->prealloc_buf; |
e4234a1f CW |
242 | if (buf) |
243 | mutex_lock(&of->prealloc_mutex); | |
244 | else | |
4ef67a8c | 245 | buf = kmalloc(len, GFP_KERNEL); |
414985ae TH |
246 | if (!buf) |
247 | return -ENOMEM; | |
248 | ||
249 | /* | |
4ef67a8c | 250 | * @of->mutex nests outside active ref and is used both to ensure that |
e4234a1f | 251 | * the ops aren't called concurrently for the same open file. |
414985ae TH |
252 | */ |
253 | mutex_lock(&of->mutex); | |
c637b8ac | 254 | if (!kernfs_get_active(of->kn)) { |
414985ae TH |
255 | len = -ENODEV; |
256 | mutex_unlock(&of->mutex); | |
257 | goto out_free; | |
258 | } | |
259 | ||
086c00c7 IK |
260 | on = kernfs_deref_open_node(of, of->kn); |
261 | if (!on) { | |
262 | len = -EINVAL; | |
263 | mutex_unlock(&of->mutex); | |
264 | goto out_free; | |
265 | } | |
266 | ||
267 | of->event = atomic_read(&on->event); | |
268 | ||
324a56e1 | 269 | ops = kernfs_ops(of->kn); |
414985ae | 270 | if (ops->read) |
4eaad21a | 271 | len = ops->read(of, buf, len, iocb->ki_pos); |
414985ae TH |
272 | else |
273 | len = -EINVAL; | |
274 | ||
e4234a1f CW |
275 | kernfs_put_active(of->kn); |
276 | mutex_unlock(&of->mutex); | |
277 | ||
414985ae | 278 | if (len < 0) |
e4234a1f | 279 | goto out_free; |
414985ae | 280 | |
4eaad21a | 281 | if (copy_to_iter(buf, len, iter) != len) { |
414985ae | 282 | len = -EFAULT; |
e4234a1f | 283 | goto out_free; |
414985ae TH |
284 | } |
285 | ||
4eaad21a | 286 | iocb->ki_pos += len; |
414985ae TH |
287 | |
288 | out_free: | |
e4234a1f CW |
289 | if (buf == of->prealloc_buf) |
290 | mutex_unlock(&of->prealloc_mutex); | |
291 | else | |
4ef67a8c | 292 | kfree(buf); |
414985ae TH |
293 | return len; |
294 | } | |
295 | ||
4eaad21a | 296 | static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter) |
414985ae | 297 | { |
4eaad21a CH |
298 | if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW) |
299 | return seq_read_iter(iocb, iter); | |
300 | return kernfs_file_read_iter(iocb, iter); | |
414985ae TH |
301 | } |
302 | ||
cc099e0b | 303 | /* |
414985ae TH |
304 | * Copy data in from userland and pass it to the matching kernfs write |
305 | * operation. | |
306 | * | |
307 | * There is no easy way for us to know if userspace is only doing a partial | |
308 | * write, so we don't support them. We expect the entire buffer to come on | |
309 | * the first write. Hint: if you're writing a value, first read the file, | |
3fe40764 | 310 | * modify only the value you're changing, then write entire buffer |
414985ae TH |
311 | * back. |
312 | */ | |
cc099e0b | 313 | static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) |
414985ae | 314 | { |
cc099e0b CH |
315 | struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); |
316 | ssize_t len = iov_iter_count(iter); | |
414985ae | 317 | const struct kernfs_ops *ops; |
b7ce40cf | 318 | char *buf; |
4d3773c4 | 319 | |
b7ce40cf | 320 | if (of->atomic_write_len) { |
b7ce40cf TH |
321 | if (len > of->atomic_write_len) |
322 | return -E2BIG; | |
4d3773c4 | 323 | } else { |
cc099e0b | 324 | len = min_t(size_t, len, PAGE_SIZE); |
4d3773c4 TH |
325 | } |
326 | ||
2b75869b | 327 | buf = of->prealloc_buf; |
e4234a1f CW |
328 | if (buf) |
329 | mutex_lock(&of->prealloc_mutex); | |
330 | else | |
2b75869b | 331 | buf = kmalloc(len + 1, GFP_KERNEL); |
b7ce40cf TH |
332 | if (!buf) |
333 | return -ENOMEM; | |
414985ae | 334 | |
cc099e0b | 335 | if (copy_from_iter(buf, len, iter) != len) { |
e4234a1f CW |
336 | len = -EFAULT; |
337 | goto out_free; | |
338 | } | |
339 | buf[len] = '\0'; /* guarantee string termination */ | |
340 | ||
b7ce40cf | 341 | /* |
2b75869b | 342 | * @of->mutex nests outside active ref and is used both to ensure that |
e4234a1f | 343 | * the ops aren't called concurrently for the same open file. |
b7ce40cf TH |
344 | */ |
345 | mutex_lock(&of->mutex); | |
346 | if (!kernfs_get_active(of->kn)) { | |
347 | mutex_unlock(&of->mutex); | |
348 | len = -ENODEV; | |
349 | goto out_free; | |
350 | } | |
351 | ||
352 | ops = kernfs_ops(of->kn); | |
353 | if (ops->write) | |
cc099e0b | 354 | len = ops->write(of, buf, len, iocb->ki_pos); |
b7ce40cf TH |
355 | else |
356 | len = -EINVAL; | |
357 | ||
e4234a1f CW |
358 | kernfs_put_active(of->kn); |
359 | mutex_unlock(&of->mutex); | |
360 | ||
414985ae | 361 | if (len > 0) |
cc099e0b | 362 | iocb->ki_pos += len; |
2b75869b | 363 | |
b7ce40cf | 364 | out_free: |
e4234a1f CW |
365 | if (buf == of->prealloc_buf) |
366 | mutex_unlock(&of->prealloc_mutex); | |
367 | else | |
2b75869b | 368 | kfree(buf); |
414985ae TH |
369 | return len; |
370 | } | |
371 | ||
372 | static void kernfs_vma_open(struct vm_area_struct *vma) | |
373 | { | |
374 | struct file *file = vma->vm_file; | |
c525aadd | 375 | struct kernfs_open_file *of = kernfs_of(file); |
414985ae TH |
376 | |
377 | if (!of->vm_ops) | |
378 | return; | |
379 | ||
c637b8ac | 380 | if (!kernfs_get_active(of->kn)) |
414985ae TH |
381 | return; |
382 | ||
383 | if (of->vm_ops->open) | |
384 | of->vm_ops->open(vma); | |
385 | ||
c637b8ac | 386 | kernfs_put_active(of->kn); |
414985ae TH |
387 | } |
388 | ||
9ee84466 | 389 | static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) |
414985ae | 390 | { |
11bac800 | 391 | struct file *file = vmf->vma->vm_file; |
c525aadd | 392 | struct kernfs_open_file *of = kernfs_of(file); |
9ee84466 | 393 | vm_fault_t ret; |
414985ae TH |
394 | |
395 | if (!of->vm_ops) | |
396 | return VM_FAULT_SIGBUS; | |
397 | ||
c637b8ac | 398 | if (!kernfs_get_active(of->kn)) |
414985ae TH |
399 | return VM_FAULT_SIGBUS; |
400 | ||
401 | ret = VM_FAULT_SIGBUS; | |
402 | if (of->vm_ops->fault) | |
11bac800 | 403 | ret = of->vm_ops->fault(vmf); |
414985ae | 404 | |
c637b8ac | 405 | kernfs_put_active(of->kn); |
414985ae TH |
406 | return ret; |
407 | } | |
408 | ||
9ee84466 | 409 | static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) |
414985ae | 410 | { |
11bac800 | 411 | struct file *file = vmf->vma->vm_file; |
c525aadd | 412 | struct kernfs_open_file *of = kernfs_of(file); |
9ee84466 | 413 | vm_fault_t ret; |
414985ae TH |
414 | |
415 | if (!of->vm_ops) | |
416 | return VM_FAULT_SIGBUS; | |
417 | ||
c637b8ac | 418 | if (!kernfs_get_active(of->kn)) |
414985ae TH |
419 | return VM_FAULT_SIGBUS; |
420 | ||
421 | ret = 0; | |
422 | if (of->vm_ops->page_mkwrite) | |
11bac800 | 423 | ret = of->vm_ops->page_mkwrite(vmf); |
414985ae TH |
424 | else |
425 | file_update_time(file); | |
426 | ||
c637b8ac | 427 | kernfs_put_active(of->kn); |
414985ae TH |
428 | return ret; |
429 | } | |
430 | ||
431 | static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, | |
432 | void *buf, int len, int write) | |
433 | { | |
434 | struct file *file = vma->vm_file; | |
c525aadd | 435 | struct kernfs_open_file *of = kernfs_of(file); |
414985ae TH |
436 | int ret; |
437 | ||
438 | if (!of->vm_ops) | |
439 | return -EINVAL; | |
440 | ||
c637b8ac | 441 | if (!kernfs_get_active(of->kn)) |
414985ae TH |
442 | return -EINVAL; |
443 | ||
444 | ret = -EINVAL; | |
445 | if (of->vm_ops->access) | |
446 | ret = of->vm_ops->access(vma, addr, buf, len, write); | |
447 | ||
c637b8ac | 448 | kernfs_put_active(of->kn); |
414985ae TH |
449 | return ret; |
450 | } | |
451 | ||
452 | #ifdef CONFIG_NUMA | |
453 | static int kernfs_vma_set_policy(struct vm_area_struct *vma, | |
454 | struct mempolicy *new) | |
455 | { | |
456 | struct file *file = vma->vm_file; | |
c525aadd | 457 | struct kernfs_open_file *of = kernfs_of(file); |
414985ae TH |
458 | int ret; |
459 | ||
460 | if (!of->vm_ops) | |
461 | return 0; | |
462 | ||
c637b8ac | 463 | if (!kernfs_get_active(of->kn)) |
414985ae TH |
464 | return -EINVAL; |
465 | ||
466 | ret = 0; | |
467 | if (of->vm_ops->set_policy) | |
468 | ret = of->vm_ops->set_policy(vma, new); | |
469 | ||
c637b8ac | 470 | kernfs_put_active(of->kn); |
414985ae TH |
471 | return ret; |
472 | } | |
473 | ||
474 | static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma, | |
475 | unsigned long addr) | |
476 | { | |
477 | struct file *file = vma->vm_file; | |
c525aadd | 478 | struct kernfs_open_file *of = kernfs_of(file); |
414985ae TH |
479 | struct mempolicy *pol; |
480 | ||
481 | if (!of->vm_ops) | |
482 | return vma->vm_policy; | |
483 | ||
c637b8ac | 484 | if (!kernfs_get_active(of->kn)) |
414985ae TH |
485 | return vma->vm_policy; |
486 | ||
487 | pol = vma->vm_policy; | |
488 | if (of->vm_ops->get_policy) | |
489 | pol = of->vm_ops->get_policy(vma, addr); | |
490 | ||
c637b8ac | 491 | kernfs_put_active(of->kn); |
414985ae TH |
492 | return pol; |
493 | } | |
494 | ||
414985ae TH |
495 | #endif |
496 | ||
497 | static const struct vm_operations_struct kernfs_vm_ops = { | |
498 | .open = kernfs_vma_open, | |
499 | .fault = kernfs_vma_fault, | |
500 | .page_mkwrite = kernfs_vma_page_mkwrite, | |
501 | .access = kernfs_vma_access, | |
502 | #ifdef CONFIG_NUMA | |
503 | .set_policy = kernfs_vma_set_policy, | |
504 | .get_policy = kernfs_vma_get_policy, | |
414985ae TH |
505 | #endif |
506 | }; | |
507 | ||
c637b8ac | 508 | static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) |
414985ae | 509 | { |
c525aadd | 510 | struct kernfs_open_file *of = kernfs_of(file); |
414985ae TH |
511 | const struct kernfs_ops *ops; |
512 | int rc; | |
513 | ||
9b2db6e1 TH |
514 | /* |
515 | * mmap path and of->mutex are prone to triggering spurious lockdep | |
516 | * warnings and we don't want to add spurious locking dependency | |
517 | * between the two. Check whether mmap is actually implemented | |
518 | * without grabbing @of->mutex by testing HAS_MMAP flag. See the | |
519 | * comment in kernfs_file_open() for more details. | |
520 | */ | |
df23fc39 | 521 | if (!(of->kn->flags & KERNFS_HAS_MMAP)) |
9b2db6e1 TH |
522 | return -ENODEV; |
523 | ||
414985ae TH |
524 | mutex_lock(&of->mutex); |
525 | ||
526 | rc = -ENODEV; | |
c637b8ac | 527 | if (!kernfs_get_active(of->kn)) |
414985ae TH |
528 | goto out_unlock; |
529 | ||
324a56e1 | 530 | ops = kernfs_ops(of->kn); |
9b2db6e1 | 531 | rc = ops->mmap(of, vma); |
b44b2140 TH |
532 | if (rc) |
533 | goto out_put; | |
414985ae TH |
534 | |
535 | /* | |
536 | * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() | |
537 | * to satisfy versions of X which crash if the mmap fails: that | |
538 | * substitutes a new vm_file, and we don't then want bin_vm_ops. | |
539 | */ | |
540 | if (vma->vm_file != file) | |
541 | goto out_put; | |
542 | ||
543 | rc = -EINVAL; | |
544 | if (of->mmapped && of->vm_ops != vma->vm_ops) | |
545 | goto out_put; | |
546 | ||
547 | /* | |
548 | * It is not possible to successfully wrap close. | |
549 | * So error if someone is trying to use close. | |
550 | */ | |
414985ae TH |
551 | if (vma->vm_ops && vma->vm_ops->close) |
552 | goto out_put; | |
553 | ||
554 | rc = 0; | |
a1d82aff | 555 | of->mmapped = true; |
414985ae TH |
556 | of->vm_ops = vma->vm_ops; |
557 | vma->vm_ops = &kernfs_vm_ops; | |
558 | out_put: | |
c637b8ac | 559 | kernfs_put_active(of->kn); |
414985ae TH |
560 | out_unlock: |
561 | mutex_unlock(&of->mutex); | |
562 | ||
563 | return rc; | |
564 | } | |
565 | ||
566 | /** | |
c637b8ac | 567 | * kernfs_get_open_node - get or create kernfs_open_node |
324a56e1 | 568 | * @kn: target kernfs_node |
c525aadd | 569 | * @of: kernfs_open_file for this instance of open |
414985ae | 570 | * |
adc5e8b5 TH |
571 | * If @kn->attr.open exists, increment its reference count; otherwise, |
572 | * create one. @of is chained to the files list. | |
414985ae TH |
573 | * |
574 | * LOCKING: | |
575 | * Kernel thread context (may sleep). | |
576 | * | |
577 | * RETURNS: | |
578 | * 0 on success, -errno on failure. | |
579 | */ | |
c637b8ac TH |
580 | static int kernfs_get_open_node(struct kernfs_node *kn, |
581 | struct kernfs_open_file *of) | |
414985ae | 582 | { |
c525aadd | 583 | struct kernfs_open_node *on, *new_on = NULL; |
41448c61 | 584 | struct mutex *mutex = NULL; |
414985ae | 585 | |
41448c61 | 586 | mutex = kernfs_open_file_mutex_lock(kn); |
086c00c7 | 587 | on = kernfs_deref_open_node_protected(kn); |
414985ae | 588 | |
c525aadd | 589 | if (on) { |
086c00c7 | 590 | list_add_tail(&of->list, &on->files); |
41448c61 | 591 | mutex_unlock(mutex); |
414985ae | 592 | return 0; |
086c00c7 IK |
593 | } else { |
594 | /* not there, initialize a new one */ | |
595 | new_on = kmalloc(sizeof(*new_on), GFP_KERNEL); | |
596 | if (!new_on) { | |
41448c61 | 597 | mutex_unlock(mutex); |
086c00c7 IK |
598 | return -ENOMEM; |
599 | } | |
600 | atomic_set(&new_on->event, 1); | |
601 | init_waitqueue_head(&new_on->poll); | |
602 | INIT_LIST_HEAD(&new_on->files); | |
603 | list_add_tail(&of->list, &new_on->files); | |
604 | rcu_assign_pointer(kn->attr.open, new_on); | |
414985ae | 605 | } |
41448c61 | 606 | mutex_unlock(mutex); |
414985ae | 607 | |
086c00c7 | 608 | return 0; |
414985ae TH |
609 | } |
610 | ||
611 | /** | |
c1b1352f IK |
612 | * kernfs_unlink_open_file - Unlink @of from @kn. |
613 | * | |
bd900901 | 614 | * @kn: target kernfs_node |
c525aadd | 615 | * @of: associated kernfs_open_file |
414985ae | 616 | * |
c1b1352f IK |
617 | * Unlink @of from list of @kn's associated open files. If list of |
618 | * associated open files becomes empty, disassociate and free | |
619 | * kernfs_open_node. | |
414985ae TH |
620 | * |
621 | * LOCKING: | |
622 | * None. | |
623 | */ | |
c1b1352f | 624 | static void kernfs_unlink_open_file(struct kernfs_node *kn, |
c637b8ac | 625 | struct kernfs_open_file *of) |
414985ae | 626 | { |
086c00c7 | 627 | struct kernfs_open_node *on; |
41448c61 | 628 | struct mutex *mutex = NULL; |
414985ae | 629 | |
41448c61 | 630 | mutex = kernfs_open_file_mutex_lock(kn); |
086c00c7 IK |
631 | |
632 | on = kernfs_deref_open_node_protected(kn); | |
633 | if (!on) { | |
41448c61 | 634 | mutex_unlock(mutex); |
086c00c7 IK |
635 | return; |
636 | } | |
414985ae TH |
637 | |
638 | if (of) | |
639 | list_del(&of->list); | |
640 | ||
086c00c7 IK |
641 | if (list_empty(&on->files)) { |
642 | rcu_assign_pointer(kn->attr.open, NULL); | |
643 | kfree_rcu(on, rcu_head); | |
644 | } | |
414985ae | 645 | |
41448c61 | 646 | mutex_unlock(mutex); |
414985ae TH |
647 | } |
648 | ||
c637b8ac | 649 | static int kernfs_fop_open(struct inode *inode, struct file *file) |
414985ae | 650 | { |
319ba91d | 651 | struct kernfs_node *kn = inode->i_private; |
555724a8 | 652 | struct kernfs_root *root = kernfs_root(kn); |
414985ae | 653 | const struct kernfs_ops *ops; |
c525aadd | 654 | struct kernfs_open_file *of; |
414985ae TH |
655 | bool has_read, has_write, has_mmap; |
656 | int error = -EACCES; | |
657 | ||
c637b8ac | 658 | if (!kernfs_get_active(kn)) |
414985ae TH |
659 | return -ENODEV; |
660 | ||
324a56e1 | 661 | ops = kernfs_ops(kn); |
414985ae TH |
662 | |
663 | has_read = ops->seq_show || ops->read || ops->mmap; | |
664 | has_write = ops->write || ops->mmap; | |
665 | has_mmap = ops->mmap; | |
666 | ||
555724a8 TH |
667 | /* see the flag definition for details */ |
668 | if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { | |
669 | if ((file->f_mode & FMODE_WRITE) && | |
670 | (!(inode->i_mode & S_IWUGO) || !has_write)) | |
671 | goto err_out; | |
414985ae | 672 | |
555724a8 TH |
673 | if ((file->f_mode & FMODE_READ) && |
674 | (!(inode->i_mode & S_IRUGO) || !has_read)) | |
675 | goto err_out; | |
676 | } | |
414985ae | 677 | |
c525aadd | 678 | /* allocate a kernfs_open_file for the file */ |
414985ae | 679 | error = -ENOMEM; |
c525aadd | 680 | of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL); |
414985ae TH |
681 | if (!of) |
682 | goto err_out; | |
683 | ||
684 | /* | |
685 | * The following is done to give a different lockdep key to | |
686 | * @of->mutex for files which implement mmap. This is a rather | |
687 | * crude way to avoid false positive lockdep warning around | |
c1e8d7c6 | 688 | * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and |
414985ae | 689 | * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under |
c1e8d7c6 | 690 | * which mm->mmap_lock nests, while holding @of->mutex. As each |
414985ae TH |
691 | * open file has a separate mutex, it's okay as long as those don't |
692 | * happen on the same file. At this point, we can't easily give | |
693 | * each file a separate locking class. Let's differentiate on | |
694 | * whether the file has mmap or not for now. | |
9b2db6e1 TH |
695 | * |
696 | * Both paths of the branch look the same. They're supposed to | |
697 | * look that way and give @of->mutex different static lockdep keys. | |
414985ae TH |
698 | */ |
699 | if (has_mmap) | |
700 | mutex_init(&of->mutex); | |
701 | else | |
702 | mutex_init(&of->mutex); | |
703 | ||
324a56e1 | 704 | of->kn = kn; |
414985ae TH |
705 | of->file = file; |
706 | ||
b7ce40cf TH |
707 | /* |
708 | * Write path needs to atomic_write_len outside active reference. | |
cc099e0b | 709 | * Cache it in open_file. See kernfs_fop_write_iter() for details. |
b7ce40cf TH |
710 | */ |
711 | of->atomic_write_len = ops->atomic_write_len; | |
712 | ||
4ef67a8c N |
713 | error = -EINVAL; |
714 | /* | |
715 | * ->seq_show is incompatible with ->prealloc, | |
716 | * as seq_read does its own allocation. | |
717 | * ->read must be used instead. | |
718 | */ | |
719 | if (ops->prealloc && ops->seq_show) | |
720 | goto err_free; | |
2b75869b N |
721 | if (ops->prealloc) { |
722 | int len = of->atomic_write_len ?: PAGE_SIZE; | |
723 | of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL); | |
724 | error = -ENOMEM; | |
725 | if (!of->prealloc_buf) | |
726 | goto err_free; | |
e4234a1f | 727 | mutex_init(&of->prealloc_mutex); |
2b75869b N |
728 | } |
729 | ||
414985ae TH |
730 | /* |
731 | * Always instantiate seq_file even if read access doesn't use | |
732 | * seq_file or is not requested. This unifies private data access | |
733 | * and readable regular files are the vast majority anyway. | |
734 | */ | |
735 | if (ops->seq_show) | |
736 | error = seq_open(file, &kernfs_seq_ops); | |
737 | else | |
738 | error = seq_open(file, NULL); | |
739 | if (error) | |
740 | goto err_free; | |
741 | ||
0e67db2f TH |
742 | of->seq_file = file->private_data; |
743 | of->seq_file->private = of; | |
414985ae TH |
744 | |
745 | /* seq_file clears PWRITE unconditionally, restore it if WRITE */ | |
746 | if (file->f_mode & FMODE_WRITE) | |
747 | file->f_mode |= FMODE_PWRITE; | |
748 | ||
c637b8ac TH |
749 | /* make sure we have open node struct */ |
750 | error = kernfs_get_open_node(kn, of); | |
414985ae | 751 | if (error) |
0e67db2f TH |
752 | goto err_seq_release; |
753 | ||
754 | if (ops->open) { | |
755 | /* nobody has access to @of yet, skip @of->mutex */ | |
756 | error = ops->open(of); | |
757 | if (error) | |
758 | goto err_put_node; | |
759 | } | |
414985ae TH |
760 | |
761 | /* open succeeded, put active references */ | |
c637b8ac | 762 | kernfs_put_active(kn); |
414985ae TH |
763 | return 0; |
764 | ||
0e67db2f | 765 | err_put_node: |
c1b1352f | 766 | kernfs_unlink_open_file(kn, of); |
0e67db2f | 767 | err_seq_release: |
414985ae TH |
768 | seq_release(inode, file); |
769 | err_free: | |
2b75869b | 770 | kfree(of->prealloc_buf); |
414985ae TH |
771 | kfree(of); |
772 | err_out: | |
c637b8ac | 773 | kernfs_put_active(kn); |
414985ae TH |
774 | return error; |
775 | } | |
776 | ||
0e67db2f TH |
777 | /* used from release/drain to ensure that ->release() is called exactly once */ |
778 | static void kernfs_release_file(struct kernfs_node *kn, | |
779 | struct kernfs_open_file *of) | |
780 | { | |
f83f3c51 TH |
781 | /* |
782 | * @of is guaranteed to have no other file operations in flight and | |
783 | * we just want to synchronize release and drain paths. | |
41448c61 | 784 | * @kernfs_open_file_mutex_ptr(kn) is enough. @of->mutex can't be used |
f83f3c51 TH |
785 | * here because drain path may be called from places which can |
786 | * cause circular dependency. | |
787 | */ | |
41448c61 | 788 | lockdep_assert_held(kernfs_open_file_mutex_ptr(kn)); |
0e67db2f | 789 | |
0e67db2f TH |
790 | if (!of->released) { |
791 | /* | |
792 | * A file is never detached without being released and we | |
793 | * need to be able to release files which are deactivated | |
794 | * and being drained. Don't use kernfs_ops(). | |
795 | */ | |
796 | kn->attr.ops->release(of); | |
797 | of->released = true; | |
798 | } | |
0e67db2f TH |
799 | } |
800 | ||
c637b8ac | 801 | static int kernfs_fop_release(struct inode *inode, struct file *filp) |
414985ae | 802 | { |
319ba91d | 803 | struct kernfs_node *kn = inode->i_private; |
c525aadd | 804 | struct kernfs_open_file *of = kernfs_of(filp); |
41448c61 | 805 | struct mutex *mutex = NULL; |
414985ae | 806 | |
f83f3c51 | 807 | if (kn->flags & KERNFS_HAS_RELEASE) { |
41448c61 | 808 | mutex = kernfs_open_file_mutex_lock(kn); |
f83f3c51 | 809 | kernfs_release_file(kn, of); |
41448c61 | 810 | mutex_unlock(mutex); |
f83f3c51 TH |
811 | } |
812 | ||
c1b1352f | 813 | kernfs_unlink_open_file(kn, of); |
414985ae | 814 | seq_release(inode, filp); |
2b75869b | 815 | kfree(of->prealloc_buf); |
414985ae TH |
816 | kfree(of); |
817 | ||
818 | return 0; | |
819 | } | |
820 | ||
0e67db2f | 821 | void kernfs_drain_open_files(struct kernfs_node *kn) |
414985ae | 822 | { |
c525aadd TH |
823 | struct kernfs_open_node *on; |
824 | struct kernfs_open_file *of; | |
41448c61 | 825 | struct mutex *mutex = NULL; |
414985ae | 826 | |
0e67db2f | 827 | if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE))) |
55f6e30d GKH |
828 | return; |
829 | ||
c1b1352f IK |
830 | /* |
831 | * lockless opportunistic check is safe below because no one is adding to | |
832 | * ->attr.open at this point of time. This check allows early bail out | |
833 | * if ->attr.open is already NULL. kernfs_unlink_open_file makes | |
834 | * ->attr.open NULL only while holding kernfs_open_file_mutex so below | |
41448c61 | 835 | * check under kernfs_open_file_mutex_ptr(kn) will ensure bailing out if |
c1b1352f IK |
836 | * ->attr.open became NULL while waiting for the mutex. |
837 | */ | |
086c00c7 | 838 | if (!rcu_access_pointer(kn->attr.open)) |
414985ae TH |
839 | return; |
840 | ||
41448c61 | 841 | mutex = kernfs_open_file_mutex_lock(kn); |
086c00c7 IK |
842 | on = kernfs_deref_open_node_protected(kn); |
843 | if (!on) { | |
41448c61 | 844 | mutex_unlock(mutex); |
bd900901 IK |
845 | return; |
846 | } | |
0e67db2f | 847 | |
c525aadd | 848 | list_for_each_entry(of, &on->files, list) { |
414985ae | 849 | struct inode *inode = file_inode(of->file); |
0e67db2f TH |
850 | |
851 | if (kn->flags & KERNFS_HAS_MMAP) | |
852 | unmap_mapping_range(inode->i_mapping, 0, 0, 1); | |
853 | ||
966fa72a VJ |
854 | if (kn->flags & KERNFS_HAS_RELEASE) |
855 | kernfs_release_file(kn, of); | |
414985ae | 856 | } |
0e67db2f | 857 | |
41448c61 | 858 | mutex_unlock(mutex); |
414985ae TH |
859 | } |
860 | ||
c637b8ac TH |
861 | /* |
862 | * Kernfs attribute files are pollable. The idea is that you read | |
414985ae TH |
863 | * the content and then you use 'poll' or 'select' to wait for |
864 | * the content to change. When the content changes (assuming the | |
865 | * manager for the kobject supports notification), poll will | |
a9a08845 | 866 | * return EPOLLERR|EPOLLPRI, and select will return the fd whether |
414985ae TH |
867 | * it is waiting for read, write, or exceptions. |
868 | * Once poll/select indicates that the value has changed, you | |
869 | * need to close and re-open the file, or seek to 0 and read again. | |
870 | * Reminder: this only works for attributes which actively support | |
871 | * it, and it is not possible to test an attribute from userspace | |
872 | * to see if it supports poll (Neither 'poll' nor 'select' return | |
873 | * an appropriate error code). When in doubt, set a suitable timeout value. | |
874 | */ | |
147e1a97 JW |
875 | __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait) |
876 | { | |
877 | struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry); | |
086c00c7 IK |
878 | struct kernfs_open_node *on = kernfs_deref_open_node(of, kn); |
879 | ||
880 | if (!on) | |
881 | return EPOLLERR; | |
147e1a97 JW |
882 | |
883 | poll_wait(of->file, &on->poll, wait); | |
884 | ||
885 | if (of->event != atomic_read(&on->event)) | |
886 | return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; | |
887 | ||
888 | return DEFAULT_POLLMASK; | |
889 | } | |
890 | ||
076ccb76 | 891 | static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) |
414985ae | 892 | { |
c525aadd | 893 | struct kernfs_open_file *of = kernfs_of(filp); |
319ba91d | 894 | struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry); |
147e1a97 | 895 | __poll_t ret; |
414985ae | 896 | |
c637b8ac | 897 | if (!kernfs_get_active(kn)) |
147e1a97 | 898 | return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; |
414985ae | 899 | |
147e1a97 JW |
900 | if (kn->attr.ops->poll) |
901 | ret = kn->attr.ops->poll(of, wait); | |
902 | else | |
903 | ret = kernfs_generic_poll(of, wait); | |
414985ae | 904 | |
c637b8ac | 905 | kernfs_put_active(kn); |
147e1a97 | 906 | return ret; |
414985ae TH |
907 | } |
908 | ||
ecca47ce | 909 | static void kernfs_notify_workfn(struct work_struct *work) |
414985ae | 910 | { |
ecca47ce | 911 | struct kernfs_node *kn; |
d911d987 | 912 | struct kernfs_super_info *info; |
393c3714 | 913 | struct kernfs_root *root; |
ecca47ce TH |
914 | repeat: |
915 | /* pop one off the notify_list */ | |
2fd26970 IK |
916 | spin_lock_irq(&kernfs_notify_lock); |
917 | kn = kernfs_notify_list; | |
918 | if (kn == KERNFS_NOTIFY_EOL) { | |
919 | spin_unlock_irq(&kernfs_notify_lock); | |
d911d987 | 920 | return; |
2fd26970 IK |
921 | } |
922 | kernfs_notify_list = kn->attr.notify_next; | |
923 | kn->attr.notify_next = NULL; | |
924 | spin_unlock_irq(&kernfs_notify_lock); | |
d911d987 | 925 | |
393c3714 | 926 | root = kernfs_root(kn); |
d911d987 | 927 | /* kick fsnotify */ |
393c3714 | 928 | down_write(&root->kernfs_rwsem); |
d911d987 | 929 | |
ecca47ce | 930 | list_for_each_entry(info, &kernfs_root(kn)->supers, node) { |
df6a58c5 | 931 | struct kernfs_node *parent; |
497b0c5a | 932 | struct inode *p_inode = NULL; |
d911d987 | 933 | struct inode *inode; |
25b229df | 934 | struct qstr name; |
d911d987 | 935 | |
df6a58c5 TH |
936 | /* |
937 | * We want fsnotify_modify() on @kn but as the | |
938 | * modifications aren't originating from userland don't | |
939 | * have the matching @file available. Look up the inodes | |
940 | * and generate the events manually. | |
941 | */ | |
67c0496e | 942 | inode = ilookup(info->sb, kernfs_ino(kn)); |
d911d987 TH |
943 | if (!inode) |
944 | continue; | |
945 | ||
25b229df | 946 | name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name)); |
df6a58c5 TH |
947 | parent = kernfs_get_parent(kn); |
948 | if (parent) { | |
67c0496e | 949 | p_inode = ilookup(info->sb, kernfs_ino(parent)); |
df6a58c5 | 950 | if (p_inode) { |
40a100d3 AG |
951 | fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD, |
952 | inode, FSNOTIFY_EVENT_INODE, | |
953 | p_inode, &name, inode, 0); | |
df6a58c5 TH |
954 | iput(p_inode); |
955 | } | |
956 | ||
957 | kernfs_put(parent); | |
d911d987 TH |
958 | } |
959 | ||
82ace1ef AG |
960 | if (!p_inode) |
961 | fsnotify_inode(inode, FS_MODIFY); | |
497b0c5a | 962 | |
d911d987 TH |
963 | iput(inode); |
964 | } | |
965 | ||
393c3714 | 966 | up_write(&root->kernfs_rwsem); |
ecca47ce TH |
967 | kernfs_put(kn); |
968 | goto repeat; | |
969 | } | |
970 | ||
971 | /** | |
972 | * kernfs_notify - notify a kernfs file | |
973 | * @kn: file to notify | |
974 | * | |
975 | * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any | |
976 | * context. | |
977 | */ | |
978 | void kernfs_notify(struct kernfs_node *kn) | |
979 | { | |
980 | static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); | |
2fd26970 | 981 | unsigned long flags; |
03c0a920 | 982 | struct kernfs_open_node *on; |
ecca47ce TH |
983 | |
984 | if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) | |
985 | return; | |
986 | ||
03c0a920 | 987 | /* kick poll immediately */ |
086c00c7 IK |
988 | rcu_read_lock(); |
989 | on = rcu_dereference(kn->attr.open); | |
03c0a920 RR |
990 | if (on) { |
991 | atomic_inc(&on->event); | |
992 | wake_up_interruptible(&on->poll); | |
993 | } | |
086c00c7 | 994 | rcu_read_unlock(); |
03c0a920 RR |
995 | |
996 | /* schedule work to kick fsnotify */ | |
2fd26970 IK |
997 | spin_lock_irqsave(&kernfs_notify_lock, flags); |
998 | if (!kn->attr.notify_next) { | |
999 | kernfs_get(kn); | |
1000 | kn->attr.notify_next = kernfs_notify_list; | |
1001 | kernfs_notify_list = kn; | |
1002 | schedule_work(&kernfs_notify_work); | |
1003 | } | |
1004 | spin_unlock_irqrestore(&kernfs_notify_lock, flags); | |
414985ae TH |
1005 | } |
1006 | EXPORT_SYMBOL_GPL(kernfs_notify); | |
1007 | ||
a797bfc3 | 1008 | const struct file_operations kernfs_file_fops = { |
4eaad21a | 1009 | .read_iter = kernfs_fop_read_iter, |
cc099e0b | 1010 | .write_iter = kernfs_fop_write_iter, |
414985ae | 1011 | .llseek = generic_file_llseek, |
c637b8ac TH |
1012 | .mmap = kernfs_fop_mmap, |
1013 | .open = kernfs_fop_open, | |
1014 | .release = kernfs_fop_release, | |
1015 | .poll = kernfs_fop_poll, | |
2a9becdd | 1016 | .fsync = noop_fsync, |
f2d6c270 CH |
1017 | .splice_read = generic_file_splice_read, |
1018 | .splice_write = iter_file_splice_write, | |
414985ae TH |
1019 | }; |
1020 | ||
1021 | /** | |
2063d608 | 1022 | * __kernfs_create_file - kernfs internal function to create a file |
414985ae TH |
1023 | * @parent: directory to create the file in |
1024 | * @name: name of the file | |
1025 | * @mode: mode of the file | |
488dee96 DT |
1026 | * @uid: uid of the file |
1027 | * @gid: gid of the file | |
414985ae TH |
1028 | * @size: size of the file |
1029 | * @ops: kernfs operations for the file | |
1030 | * @priv: private data for the file | |
1031 | * @ns: optional namespace tag of the file | |
1032 | * @key: lockdep key for the file's active_ref, %NULL to disable lockdep | |
1033 | * | |
1034 | * Returns the created node on success, ERR_PTR() value on error. | |
1035 | */ | |
2063d608 TH |
1036 | struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, |
1037 | const char *name, | |
488dee96 DT |
1038 | umode_t mode, kuid_t uid, kgid_t gid, |
1039 | loff_t size, | |
2063d608 TH |
1040 | const struct kernfs_ops *ops, |
1041 | void *priv, const void *ns, | |
2063d608 | 1042 | struct lock_class_key *key) |
414985ae | 1043 | { |
324a56e1 | 1044 | struct kernfs_node *kn; |
2063d608 | 1045 | unsigned flags; |
414985ae TH |
1046 | int rc; |
1047 | ||
2063d608 | 1048 | flags = KERNFS_FILE; |
2063d608 | 1049 | |
488dee96 DT |
1050 | kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, |
1051 | uid, gid, flags); | |
324a56e1 | 1052 | if (!kn) |
414985ae TH |
1053 | return ERR_PTR(-ENOMEM); |
1054 | ||
adc5e8b5 TH |
1055 | kn->attr.ops = ops; |
1056 | kn->attr.size = size; | |
1057 | kn->ns = ns; | |
324a56e1 | 1058 | kn->priv = priv; |
414985ae TH |
1059 | |
1060 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
1061 | if (key) { | |
0f605db5 | 1062 | lockdep_init_map(&kn->dep_map, "kn->active", key, 0); |
df23fc39 | 1063 | kn->flags |= KERNFS_LOCKDEP; |
414985ae TH |
1064 | } |
1065 | #endif | |
1066 | ||
1067 | /* | |
1970a062 | 1068 | * kn->attr.ops is accessible only while holding active ref. We |
414985ae TH |
1069 | * need to know whether some ops are implemented outside active |
1070 | * ref. Cache their existence in flags. | |
1071 | */ | |
1072 | if (ops->seq_show) | |
df23fc39 | 1073 | kn->flags |= KERNFS_HAS_SEQ_SHOW; |
414985ae | 1074 | if (ops->mmap) |
df23fc39 | 1075 | kn->flags |= KERNFS_HAS_MMAP; |
0e67db2f TH |
1076 | if (ops->release) |
1077 | kn->flags |= KERNFS_HAS_RELEASE; | |
414985ae | 1078 | |
988cd7af | 1079 | rc = kernfs_add_one(kn); |
414985ae | 1080 | if (rc) { |
324a56e1 | 1081 | kernfs_put(kn); |
414985ae TH |
1082 | return ERR_PTR(rc); |
1083 | } | |
324a56e1 | 1084 | return kn; |
414985ae | 1085 | } |