]> git.ipfire.org Git - thirdparty/kernel/linux.git/blob - drivers/infiniband/hw/hfi1/file_ops.c
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
[thirdparty/kernel/linux.git] / drivers / infiniband / hw / hfi1 / file_ops.c
1 /*
2 * Copyright(c) 2015, 2016 Intel Corporation.
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * BSD LICENSE
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 *
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
29 * distribution.
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 *
46 */
47 #include <linux/poll.h>
48 #include <linux/cdev.h>
49 #include <linux/vmalloc.h>
50 #include <linux/io.h>
51
52 #include <rdma/ib.h>
53
54 #include "hfi.h"
55 #include "pio.h"
56 #include "device.h"
57 #include "common.h"
58 #include "trace.h"
59 #include "user_sdma.h"
60 #include "user_exp_rcv.h"
61 #include "eprom.h"
62 #include "aspm.h"
63 #include "mmu_rb.h"
64
65 #undef pr_fmt
66 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
67
68 #define SEND_CTXT_HALT_TIMEOUT 1000 /* msecs */
69
70 /*
71 * File operation functions
72 */
73 static int hfi1_file_open(struct inode *, struct file *);
74 static int hfi1_file_close(struct inode *, struct file *);
75 static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *);
76 static unsigned int hfi1_poll(struct file *, struct poll_table_struct *);
77 static int hfi1_file_mmap(struct file *, struct vm_area_struct *);
78
79 static u64 kvirt_to_phys(void *);
80 static int assign_ctxt(struct file *, struct hfi1_user_info *);
81 static int init_subctxts(struct hfi1_ctxtdata *, const struct hfi1_user_info *);
82 static int user_init(struct file *);
83 static int get_ctxt_info(struct file *, void __user *, __u32);
84 static int get_base_info(struct file *, void __user *, __u32);
85 static int setup_ctxt(struct file *);
86 static int setup_subctxt(struct hfi1_ctxtdata *);
87 static int get_user_context(struct file *, struct hfi1_user_info *, int);
88 static int find_shared_ctxt(struct file *, const struct hfi1_user_info *);
89 static int allocate_ctxt(struct file *, struct hfi1_devdata *,
90 struct hfi1_user_info *);
91 static unsigned int poll_urgent(struct file *, struct poll_table_struct *);
92 static unsigned int poll_next(struct file *, struct poll_table_struct *);
93 static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
94 static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
95 static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
96 static int vma_fault(struct vm_area_struct *, struct vm_fault *);
97 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
98 unsigned long arg);
99
100 static const struct file_operations hfi1_file_ops = {
101 .owner = THIS_MODULE,
102 .write_iter = hfi1_write_iter,
103 .open = hfi1_file_open,
104 .release = hfi1_file_close,
105 .unlocked_ioctl = hfi1_file_ioctl,
106 .poll = hfi1_poll,
107 .mmap = hfi1_file_mmap,
108 .llseek = noop_llseek,
109 };
110
111 static struct vm_operations_struct vm_ops = {
112 .fault = vma_fault,
113 };
114
115 /*
116 * Types of memories mapped into user processes' space
117 */
118 enum mmap_types {
119 PIO_BUFS = 1,
120 PIO_BUFS_SOP,
121 PIO_CRED,
122 RCV_HDRQ,
123 RCV_EGRBUF,
124 UREGS,
125 EVENTS,
126 STATUS,
127 RTAIL,
128 SUBCTXT_UREGS,
129 SUBCTXT_RCV_HDRQ,
130 SUBCTXT_EGRBUF,
131 SDMA_COMP
132 };
133
134 /*
135 * Masks and offsets defining the mmap tokens
136 */
137 #define HFI1_MMAP_OFFSET_MASK 0xfffULL
138 #define HFI1_MMAP_OFFSET_SHIFT 0
139 #define HFI1_MMAP_SUBCTXT_MASK 0xfULL
140 #define HFI1_MMAP_SUBCTXT_SHIFT 12
141 #define HFI1_MMAP_CTXT_MASK 0xffULL
142 #define HFI1_MMAP_CTXT_SHIFT 16
143 #define HFI1_MMAP_TYPE_MASK 0xfULL
144 #define HFI1_MMAP_TYPE_SHIFT 24
145 #define HFI1_MMAP_MAGIC_MASK 0xffffffffULL
146 #define HFI1_MMAP_MAGIC_SHIFT 32
147
148 #define HFI1_MMAP_MAGIC 0xdabbad00
149
150 #define HFI1_MMAP_TOKEN_SET(field, val) \
151 (((val) & HFI1_MMAP_##field##_MASK) << HFI1_MMAP_##field##_SHIFT)
152 #define HFI1_MMAP_TOKEN_GET(field, token) \
153 (((token) >> HFI1_MMAP_##field##_SHIFT) & HFI1_MMAP_##field##_MASK)
154 #define HFI1_MMAP_TOKEN(type, ctxt, subctxt, addr) \
155 (HFI1_MMAP_TOKEN_SET(MAGIC, HFI1_MMAP_MAGIC) | \
156 HFI1_MMAP_TOKEN_SET(TYPE, type) | \
157 HFI1_MMAP_TOKEN_SET(CTXT, ctxt) | \
158 HFI1_MMAP_TOKEN_SET(SUBCTXT, subctxt) | \
159 HFI1_MMAP_TOKEN_SET(OFFSET, (offset_in_page(addr))))
160
161 #define dbg(fmt, ...) \
162 pr_info(fmt, ##__VA_ARGS__)
163
164 static inline int is_valid_mmap(u64 token)
165 {
166 return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC);
167 }
168
169 static int hfi1_file_open(struct inode *inode, struct file *fp)
170 {
171 struct hfi1_devdata *dd = container_of(inode->i_cdev,
172 struct hfi1_devdata,
173 user_cdev);
174
175 /* Just take a ref now. Not all opens result in a context assign */
176 kobject_get(&dd->kobj);
177
178 /* The real work is performed later in assign_ctxt() */
179 fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL);
180 if (fp->private_data) /* no cpu affinity by default */
181 ((struct hfi1_filedata *)fp->private_data)->rec_cpu_num = -1;
182 return fp->private_data ? 0 : -ENOMEM;
183 }
184
185 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
186 unsigned long arg)
187 {
188 struct hfi1_filedata *fd = fp->private_data;
189 struct hfi1_ctxtdata *uctxt = fd->uctxt;
190 struct hfi1_user_info uinfo;
191 struct hfi1_tid_info tinfo;
192 int ret = 0;
193 unsigned long addr;
194 int uval = 0;
195 unsigned long ul_uval = 0;
196 u16 uval16 = 0;
197
198 hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd);
199 if (cmd != HFI1_IOCTL_ASSIGN_CTXT &&
200 cmd != HFI1_IOCTL_GET_VERS &&
201 !uctxt)
202 return -EINVAL;
203
204 switch (cmd) {
205 case HFI1_IOCTL_ASSIGN_CTXT:
206 if (uctxt)
207 return -EINVAL;
208
209 if (copy_from_user(&uinfo,
210 (struct hfi1_user_info __user *)arg,
211 sizeof(uinfo)))
212 return -EFAULT;
213
214 ret = assign_ctxt(fp, &uinfo);
215 if (ret < 0)
216 return ret;
217 setup_ctxt(fp);
218 if (ret)
219 return ret;
220 ret = user_init(fp);
221 break;
222 case HFI1_IOCTL_CTXT_INFO:
223 ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg,
224 sizeof(struct hfi1_ctxt_info));
225 break;
226 case HFI1_IOCTL_USER_INFO:
227 ret = get_base_info(fp, (void __user *)(unsigned long)arg,
228 sizeof(struct hfi1_base_info));
229 break;
230 case HFI1_IOCTL_CREDIT_UPD:
231 if (uctxt)
232 sc_return_credits(uctxt->sc);
233 break;
234
235 case HFI1_IOCTL_TID_UPDATE:
236 if (copy_from_user(&tinfo,
237 (struct hfi11_tid_info __user *)arg,
238 sizeof(tinfo)))
239 return -EFAULT;
240
241 ret = hfi1_user_exp_rcv_setup(fp, &tinfo);
242 if (!ret) {
243 /*
244 * Copy the number of tidlist entries we used
245 * and the length of the buffer we registered.
246 * These fields are adjacent in the structure so
247 * we can copy them at the same time.
248 */
249 addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
250 if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
251 sizeof(tinfo.tidcnt) +
252 sizeof(tinfo.length)))
253 ret = -EFAULT;
254 }
255 break;
256
257 case HFI1_IOCTL_TID_FREE:
258 if (copy_from_user(&tinfo,
259 (struct hfi11_tid_info __user *)arg,
260 sizeof(tinfo)))
261 return -EFAULT;
262
263 ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
264 if (ret)
265 break;
266 addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
267 if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
268 sizeof(tinfo.tidcnt)))
269 ret = -EFAULT;
270 break;
271
272 case HFI1_IOCTL_TID_INVAL_READ:
273 if (copy_from_user(&tinfo,
274 (struct hfi11_tid_info __user *)arg,
275 sizeof(tinfo)))
276 return -EFAULT;
277
278 ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
279 if (ret)
280 break;
281 addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
282 if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
283 sizeof(tinfo.tidcnt)))
284 ret = -EFAULT;
285 break;
286
287 case HFI1_IOCTL_RECV_CTRL:
288 ret = get_user(uval, (int __user *)arg);
289 if (ret != 0)
290 return -EFAULT;
291 ret = manage_rcvq(uctxt, fd->subctxt, uval);
292 break;
293
294 case HFI1_IOCTL_POLL_TYPE:
295 ret = get_user(uval, (int __user *)arg);
296 if (ret != 0)
297 return -EFAULT;
298 uctxt->poll_type = (typeof(uctxt->poll_type))uval;
299 break;
300
301 case HFI1_IOCTL_ACK_EVENT:
302 ret = get_user(ul_uval, (unsigned long __user *)arg);
303 if (ret != 0)
304 return -EFAULT;
305 ret = user_event_ack(uctxt, fd->subctxt, ul_uval);
306 break;
307
308 case HFI1_IOCTL_SET_PKEY:
309 ret = get_user(uval16, (u16 __user *)arg);
310 if (ret != 0)
311 return -EFAULT;
312 if (HFI1_CAP_IS_USET(PKEY_CHECK))
313 ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16);
314 else
315 return -EPERM;
316 break;
317
318 case HFI1_IOCTL_CTXT_RESET: {
319 struct send_context *sc;
320 struct hfi1_devdata *dd;
321
322 if (!uctxt || !uctxt->dd || !uctxt->sc)
323 return -EINVAL;
324
325 /*
326 * There is no protection here. User level has to
327 * guarantee that no one will be writing to the send
328 * context while it is being re-initialized.
329 * If user level breaks that guarantee, it will break
330 * it's own context and no one else's.
331 */
332 dd = uctxt->dd;
333 sc = uctxt->sc;
334 /*
335 * Wait until the interrupt handler has marked the
336 * context as halted or frozen. Report error if we time
337 * out.
338 */
339 wait_event_interruptible_timeout(
340 sc->halt_wait, (sc->flags & SCF_HALTED),
341 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
342 if (!(sc->flags & SCF_HALTED))
343 return -ENOLCK;
344
345 /*
346 * If the send context was halted due to a Freeze,
347 * wait until the device has been "unfrozen" before
348 * resetting the context.
349 */
350 if (sc->flags & SCF_FROZEN) {
351 wait_event_interruptible_timeout(
352 dd->event_queue,
353 !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN),
354 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
355 if (dd->flags & HFI1_FROZEN)
356 return -ENOLCK;
357
358 if (dd->flags & HFI1_FORCED_FREEZE)
359 /*
360 * Don't allow context reset if we are into
361 * forced freeze
362 */
363 return -ENODEV;
364
365 sc_disable(sc);
366 ret = sc_enable(sc);
367 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB,
368 uctxt->ctxt);
369 } else {
370 ret = sc_restart(sc);
371 }
372 if (!ret)
373 sc_return_credits(sc);
374 break;
375 }
376
377 case HFI1_IOCTL_GET_VERS:
378 uval = HFI1_USER_SWVERSION;
379 if (put_user(uval, (int __user *)arg))
380 return -EFAULT;
381 break;
382
383 default:
384 return -EINVAL;
385 }
386
387 return ret;
388 }
389
390 static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
391 {
392 struct hfi1_filedata *fd = kiocb->ki_filp->private_data;
393 struct hfi1_user_sdma_pkt_q *pq = fd->pq;
394 struct hfi1_user_sdma_comp_q *cq = fd->cq;
395 int ret = 0, done = 0, reqs = 0;
396 unsigned long dim = from->nr_segs;
397
398 if (!cq || !pq) {
399 ret = -EIO;
400 goto done;
401 }
402
403 if (!iter_is_iovec(from) || !dim) {
404 ret = -EINVAL;
405 goto done;
406 }
407
408 hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)",
409 fd->uctxt->ctxt, fd->subctxt, dim);
410
411 if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) {
412 ret = -ENOSPC;
413 goto done;
414 }
415
416 while (dim) {
417 unsigned long count = 0;
418
419 ret = hfi1_user_sdma_process_request(
420 kiocb->ki_filp, (struct iovec *)(from->iov + done),
421 dim, &count);
422 if (ret)
423 goto done;
424 dim -= count;
425 done += count;
426 reqs++;
427 }
428 done:
429 return ret ? ret : reqs;
430 }
431
432 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
433 {
434 struct hfi1_filedata *fd = fp->private_data;
435 struct hfi1_ctxtdata *uctxt = fd->uctxt;
436 struct hfi1_devdata *dd;
437 unsigned long flags, pfn;
438 u64 token = vma->vm_pgoff << PAGE_SHIFT,
439 memaddr = 0;
440 u8 subctxt, mapio = 0, vmf = 0, type;
441 ssize_t memlen = 0;
442 int ret = 0;
443 u16 ctxt;
444
445 if (!is_valid_mmap(token) || !uctxt ||
446 !(vma->vm_flags & VM_SHARED)) {
447 ret = -EINVAL;
448 goto done;
449 }
450 dd = uctxt->dd;
451 ctxt = HFI1_MMAP_TOKEN_GET(CTXT, token);
452 subctxt = HFI1_MMAP_TOKEN_GET(SUBCTXT, token);
453 type = HFI1_MMAP_TOKEN_GET(TYPE, token);
454 if (ctxt != uctxt->ctxt || subctxt != fd->subctxt) {
455 ret = -EINVAL;
456 goto done;
457 }
458
459 flags = vma->vm_flags;
460
461 switch (type) {
462 case PIO_BUFS:
463 case PIO_BUFS_SOP:
464 memaddr = ((dd->physaddr + TXE_PIO_SEND) +
465 /* chip pio base */
466 (uctxt->sc->hw_context * BIT(16))) +
467 /* 64K PIO space / ctxt */
468 (type == PIO_BUFS_SOP ?
469 (TXE_PIO_SIZE / 2) : 0); /* sop? */
470 /*
471 * Map only the amount allocated to the context, not the
472 * entire available context's PIO space.
473 */
474 memlen = PAGE_ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE);
475 flags &= ~VM_MAYREAD;
476 flags |= VM_DONTCOPY | VM_DONTEXPAND;
477 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
478 mapio = 1;
479 break;
480 case PIO_CRED:
481 if (flags & VM_WRITE) {
482 ret = -EPERM;
483 goto done;
484 }
485 /*
486 * The credit return location for this context could be on the
487 * second or third page allocated for credit returns (if number
488 * of enabled contexts > 64 and 128 respectively).
489 */
490 memaddr = dd->cr_base[uctxt->numa_id].pa +
491 (((u64)uctxt->sc->hw_free -
492 (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK);
493 memlen = PAGE_SIZE;
494 flags &= ~VM_MAYWRITE;
495 flags |= VM_DONTCOPY | VM_DONTEXPAND;
496 /*
497 * The driver has already allocated memory for credit
498 * returns and programmed it into the chip. Has that
499 * memory been flagged as non-cached?
500 */
501 /* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */
502 mapio = 1;
503 break;
504 case RCV_HDRQ:
505 memaddr = uctxt->rcvhdrq_phys;
506 memlen = uctxt->rcvhdrq_size;
507 break;
508 case RCV_EGRBUF: {
509 unsigned long addr;
510 int i;
511 /*
512 * The RcvEgr buffer need to be handled differently
513 * as multiple non-contiguous pages need to be mapped
514 * into the user process.
515 */
516 memlen = uctxt->egrbufs.size;
517 if ((vma->vm_end - vma->vm_start) != memlen) {
518 dd_dev_err(dd, "Eager buffer map size invalid (%lu != %lu)\n",
519 (vma->vm_end - vma->vm_start), memlen);
520 ret = -EINVAL;
521 goto done;
522 }
523 if (vma->vm_flags & VM_WRITE) {
524 ret = -EPERM;
525 goto done;
526 }
527 vma->vm_flags &= ~VM_MAYWRITE;
528 addr = vma->vm_start;
529 for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) {
530 ret = remap_pfn_range(
531 vma, addr,
532 uctxt->egrbufs.buffers[i].phys >> PAGE_SHIFT,
533 uctxt->egrbufs.buffers[i].len,
534 vma->vm_page_prot);
535 if (ret < 0)
536 goto done;
537 addr += uctxt->egrbufs.buffers[i].len;
538 }
539 ret = 0;
540 goto done;
541 }
542 case UREGS:
543 /*
544 * Map only the page that contains this context's user
545 * registers.
546 */
547 memaddr = (unsigned long)
548 (dd->physaddr + RXE_PER_CONTEXT_USER)
549 + (uctxt->ctxt * RXE_PER_CONTEXT_SIZE);
550 /*
551 * TidFlow table is on the same page as the rest of the
552 * user registers.
553 */
554 memlen = PAGE_SIZE;
555 flags |= VM_DONTCOPY | VM_DONTEXPAND;
556 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
557 mapio = 1;
558 break;
559 case EVENTS:
560 /*
561 * Use the page where this context's flags are. User level
562 * knows where it's own bitmap is within the page.
563 */
564 memaddr = (unsigned long)(dd->events +
565 ((uctxt->ctxt - dd->first_user_ctxt) *
566 HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK;
567 memlen = PAGE_SIZE;
568 /*
569 * v3.7 removes VM_RESERVED but the effect is kept by
570 * using VM_IO.
571 */
572 flags |= VM_IO | VM_DONTEXPAND;
573 vmf = 1;
574 break;
575 case STATUS:
576 memaddr = kvirt_to_phys((void *)dd->status);
577 memlen = PAGE_SIZE;
578 flags |= VM_IO | VM_DONTEXPAND;
579 break;
580 case RTAIL:
581 if (!HFI1_CAP_IS_USET(DMA_RTAIL)) {
582 /*
583 * If the memory allocation failed, the context alloc
584 * also would have failed, so we would never get here
585 */
586 ret = -EINVAL;
587 goto done;
588 }
589 if (flags & VM_WRITE) {
590 ret = -EPERM;
591 goto done;
592 }
593 memaddr = uctxt->rcvhdrqtailaddr_phys;
594 memlen = PAGE_SIZE;
595 flags &= ~VM_MAYWRITE;
596 break;
597 case SUBCTXT_UREGS:
598 memaddr = (u64)uctxt->subctxt_uregbase;
599 memlen = PAGE_SIZE;
600 flags |= VM_IO | VM_DONTEXPAND;
601 vmf = 1;
602 break;
603 case SUBCTXT_RCV_HDRQ:
604 memaddr = (u64)uctxt->subctxt_rcvhdr_base;
605 memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt;
606 flags |= VM_IO | VM_DONTEXPAND;
607 vmf = 1;
608 break;
609 case SUBCTXT_EGRBUF:
610 memaddr = (u64)uctxt->subctxt_rcvegrbuf;
611 memlen = uctxt->egrbufs.size * uctxt->subctxt_cnt;
612 flags |= VM_IO | VM_DONTEXPAND;
613 flags &= ~VM_MAYWRITE;
614 vmf = 1;
615 break;
616 case SDMA_COMP: {
617 struct hfi1_user_sdma_comp_q *cq = fd->cq;
618
619 if (!cq) {
620 ret = -EFAULT;
621 goto done;
622 }
623 memaddr = (u64)cq->comps;
624 memlen = PAGE_ALIGN(sizeof(*cq->comps) * cq->nentries);
625 flags |= VM_IO | VM_DONTEXPAND;
626 vmf = 1;
627 break;
628 }
629 default:
630 ret = -EINVAL;
631 break;
632 }
633
634 if ((vma->vm_end - vma->vm_start) != memlen) {
635 hfi1_cdbg(PROC, "%u:%u Memory size mismatch %lu:%lu",
636 uctxt->ctxt, fd->subctxt,
637 (vma->vm_end - vma->vm_start), memlen);
638 ret = -EINVAL;
639 goto done;
640 }
641
642 vma->vm_flags = flags;
643 hfi1_cdbg(PROC,
644 "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n",
645 ctxt, subctxt, type, mapio, vmf, memaddr, memlen,
646 vma->vm_end - vma->vm_start, vma->vm_flags);
647 pfn = (unsigned long)(memaddr >> PAGE_SHIFT);
648 if (vmf) {
649 vma->vm_pgoff = pfn;
650 vma->vm_ops = &vm_ops;
651 ret = 0;
652 } else if (mapio) {
653 ret = io_remap_pfn_range(vma, vma->vm_start, pfn, memlen,
654 vma->vm_page_prot);
655 } else {
656 ret = remap_pfn_range(vma, vma->vm_start, pfn, memlen,
657 vma->vm_page_prot);
658 }
659 done:
660 return ret;
661 }
662
663 /*
664 * Local (non-chip) user memory is not mapped right away but as it is
665 * accessed by the user-level code.
666 */
667 static int vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
668 {
669 struct page *page;
670
671 page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
672 if (!page)
673 return VM_FAULT_SIGBUS;
674
675 get_page(page);
676 vmf->page = page;
677
678 return 0;
679 }
680
681 static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt)
682 {
683 struct hfi1_ctxtdata *uctxt;
684 unsigned pollflag;
685
686 uctxt = ((struct hfi1_filedata *)fp->private_data)->uctxt;
687 if (!uctxt)
688 pollflag = POLLERR;
689 else if (uctxt->poll_type == HFI1_POLL_TYPE_URGENT)
690 pollflag = poll_urgent(fp, pt);
691 else if (uctxt->poll_type == HFI1_POLL_TYPE_ANYRCV)
692 pollflag = poll_next(fp, pt);
693 else /* invalid */
694 pollflag = POLLERR;
695
696 return pollflag;
697 }
698
699 static int hfi1_file_close(struct inode *inode, struct file *fp)
700 {
701 struct hfi1_filedata *fdata = fp->private_data;
702 struct hfi1_ctxtdata *uctxt = fdata->uctxt;
703 struct hfi1_devdata *dd = container_of(inode->i_cdev,
704 struct hfi1_devdata,
705 user_cdev);
706 unsigned long flags, *ev;
707
708 fp->private_data = NULL;
709
710 if (!uctxt)
711 goto done;
712
713 hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt);
714 mutex_lock(&hfi1_mutex);
715
716 flush_wc();
717 /* drain user sdma queue */
718 hfi1_user_sdma_free_queues(fdata);
719
720 /* release the cpu */
721 hfi1_put_proc_affinity(dd, fdata->rec_cpu_num);
722
723 /*
724 * Clear any left over, unhandled events so the next process that
725 * gets this context doesn't get confused.
726 */
727 ev = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) *
728 HFI1_MAX_SHARED_CTXTS) + fdata->subctxt;
729 *ev = 0;
730
731 if (--uctxt->cnt) {
732 uctxt->active_slaves &= ~(1 << fdata->subctxt);
733 uctxt->subpid[fdata->subctxt] = 0;
734 mutex_unlock(&hfi1_mutex);
735 goto done;
736 }
737
738 spin_lock_irqsave(&dd->uctxt_lock, flags);
739 /*
740 * Disable receive context and interrupt available, reset all
741 * RcvCtxtCtrl bits to default values.
742 */
743 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
744 HFI1_RCVCTRL_TIDFLOW_DIS |
745 HFI1_RCVCTRL_INTRAVAIL_DIS |
746 HFI1_RCVCTRL_TAILUPD_DIS |
747 HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
748 HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
749 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
750 /* Clear the context's J_KEY */
751 hfi1_clear_ctxt_jkey(dd, uctxt->ctxt);
752 /*
753 * Reset context integrity checks to default.
754 * (writes to CSRs probably belong in chip.c)
755 */
756 write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
757 hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type));
758 sc_disable(uctxt->sc);
759 uctxt->pid = 0;
760 spin_unlock_irqrestore(&dd->uctxt_lock, flags);
761
762 dd->rcd[uctxt->ctxt] = NULL;
763
764 hfi1_user_exp_rcv_free(fdata);
765 hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
766
767 uctxt->rcvwait_to = 0;
768 uctxt->piowait_to = 0;
769 uctxt->rcvnowait = 0;
770 uctxt->pionowait = 0;
771 uctxt->event_flags = 0;
772
773 hfi1_stats.sps_ctxts--;
774 if (++dd->freectxts == dd->num_user_contexts)
775 aspm_enable_all(dd);
776 mutex_unlock(&hfi1_mutex);
777 hfi1_free_ctxtdata(dd, uctxt);
778 done:
779 kobject_put(&dd->kobj);
780 kfree(fdata);
781 return 0;
782 }
783
784 /*
785 * Convert kernel *virtual* addresses to physical addresses.
786 * This is used to vmalloc'ed addresses.
787 */
788 static u64 kvirt_to_phys(void *addr)
789 {
790 struct page *page;
791 u64 paddr = 0;
792
793 page = vmalloc_to_page(addr);
794 if (page)
795 paddr = page_to_pfn(page) << PAGE_SHIFT;
796
797 return paddr;
798 }
799
800 static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
801 {
802 int i_minor, ret = 0;
803 unsigned int swmajor, swminor;
804
805 swmajor = uinfo->userversion >> 16;
806 if (swmajor != HFI1_USER_SWMAJOR) {
807 ret = -ENODEV;
808 goto done;
809 }
810
811 swminor = uinfo->userversion & 0xffff;
812
813 mutex_lock(&hfi1_mutex);
814 /* First, lets check if we need to setup a shared context? */
815 if (uinfo->subctxt_cnt) {
816 struct hfi1_filedata *fd = fp->private_data;
817
818 ret = find_shared_ctxt(fp, uinfo);
819 if (ret < 0)
820 goto done_unlock;
821 if (ret)
822 fd->rec_cpu_num = hfi1_get_proc_affinity(
823 fd->uctxt->dd, fd->uctxt->numa_id);
824 }
825
826 /*
827 * We execute the following block if we couldn't find a
828 * shared context or if context sharing is not required.
829 */
830 if (!ret) {
831 i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
832 ret = get_user_context(fp, uinfo, i_minor);
833 }
834 done_unlock:
835 mutex_unlock(&hfi1_mutex);
836 done:
837 return ret;
838 }
839
840 static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo,
841 int devno)
842 {
843 struct hfi1_devdata *dd = NULL;
844 int devmax, npresent, nup;
845
846 devmax = hfi1_count_units(&npresent, &nup);
847 if (!npresent)
848 return -ENXIO;
849
850 if (!nup)
851 return -ENETDOWN;
852
853 dd = hfi1_lookup(devno);
854 if (!dd)
855 return -ENODEV;
856 else if (!dd->freectxts)
857 return -EBUSY;
858
859 return allocate_ctxt(fp, dd, uinfo);
860 }
861
862 static int find_shared_ctxt(struct file *fp,
863 const struct hfi1_user_info *uinfo)
864 {
865 int devmax, ndev, i;
866 int ret = 0;
867 struct hfi1_filedata *fd = fp->private_data;
868
869 devmax = hfi1_count_units(NULL, NULL);
870
871 for (ndev = 0; ndev < devmax; ndev++) {
872 struct hfi1_devdata *dd = hfi1_lookup(ndev);
873
874 if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase))
875 continue;
876 for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) {
877 struct hfi1_ctxtdata *uctxt = dd->rcd[i];
878
879 /* Skip ctxts which are not yet open */
880 if (!uctxt || !uctxt->cnt)
881 continue;
882 /* Skip ctxt if it doesn't match the requested one */
883 if (memcmp(uctxt->uuid, uinfo->uuid,
884 sizeof(uctxt->uuid)) ||
885 uctxt->jkey != generate_jkey(current_uid()) ||
886 uctxt->subctxt_id != uinfo->subctxt_id ||
887 uctxt->subctxt_cnt != uinfo->subctxt_cnt)
888 continue;
889
890 /* Verify the sharing process matches the master */
891 if (uctxt->userversion != uinfo->userversion ||
892 uctxt->cnt >= uctxt->subctxt_cnt) {
893 ret = -EINVAL;
894 goto done;
895 }
896 fd->uctxt = uctxt;
897 fd->subctxt = uctxt->cnt++;
898 uctxt->subpid[fd->subctxt] = current->pid;
899 uctxt->active_slaves |= 1 << fd->subctxt;
900 ret = 1;
901 goto done;
902 }
903 }
904
905 done:
906 return ret;
907 }
908
909 static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
910 struct hfi1_user_info *uinfo)
911 {
912 struct hfi1_filedata *fd = fp->private_data;
913 struct hfi1_ctxtdata *uctxt;
914 unsigned ctxt;
915 int ret, numa;
916
917 if (dd->flags & HFI1_FROZEN) {
918 /*
919 * Pick an error that is unique from all other errors
920 * that are returned so the user process knows that
921 * it tried to allocate while the SPC was frozen. It
922 * it should be able to retry with success in a short
923 * while.
924 */
925 return -EIO;
926 }
927
928 for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; ctxt++)
929 if (!dd->rcd[ctxt])
930 break;
931
932 if (ctxt == dd->num_rcv_contexts)
933 return -EBUSY;
934
935 fd->rec_cpu_num = hfi1_get_proc_affinity(dd, -1);
936 if (fd->rec_cpu_num != -1)
937 numa = cpu_to_node(fd->rec_cpu_num);
938 else
939 numa = numa_node_id();
940 uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, numa);
941 if (!uctxt) {
942 dd_dev_err(dd,
943 "Unable to allocate ctxtdata memory, failing open\n");
944 return -ENOMEM;
945 }
946 hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)",
947 uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num,
948 uctxt->numa_id);
949
950 /*
951 * Allocate and enable a PIO send context.
952 */
953 uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize,
954 uctxt->dd->node);
955 if (!uctxt->sc)
956 return -ENOMEM;
957
958 hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index,
959 uctxt->sc->hw_context);
960 ret = sc_enable(uctxt->sc);
961 if (ret)
962 return ret;
963 /*
964 * Setup shared context resources if the user-level has requested
965 * shared contexts and this is the 'master' process.
966 * This has to be done here so the rest of the sub-contexts find the
967 * proper master.
968 */
969 if (uinfo->subctxt_cnt && !fd->subctxt) {
970 ret = init_subctxts(uctxt, uinfo);
971 /*
972 * On error, we don't need to disable and de-allocate the
973 * send context because it will be done during file close
974 */
975 if (ret)
976 return ret;
977 }
978 uctxt->userversion = uinfo->userversion;
979 uctxt->pid = current->pid;
980 uctxt->flags = HFI1_CAP_UGET(MASK);
981 init_waitqueue_head(&uctxt->wait);
982 strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm));
983 memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid));
984 uctxt->jkey = generate_jkey(current_uid());
985 INIT_LIST_HEAD(&uctxt->sdma_queues);
986 spin_lock_init(&uctxt->sdma_qlock);
987 hfi1_stats.sps_ctxts++;
988 /*
989 * Disable ASPM when there are open user/PSM contexts to avoid
990 * issues with ASPM L1 exit latency
991 */
992 if (dd->freectxts-- == dd->num_user_contexts)
993 aspm_disable_all(dd);
994 fd->uctxt = uctxt;
995
996 return 0;
997 }
998
999 static int init_subctxts(struct hfi1_ctxtdata *uctxt,
1000 const struct hfi1_user_info *uinfo)
1001 {
1002 unsigned num_subctxts;
1003
1004 num_subctxts = uinfo->subctxt_cnt;
1005 if (num_subctxts > HFI1_MAX_SHARED_CTXTS)
1006 return -EINVAL;
1007
1008 uctxt->subctxt_cnt = uinfo->subctxt_cnt;
1009 uctxt->subctxt_id = uinfo->subctxt_id;
1010 uctxt->active_slaves = 1;
1011 uctxt->redirect_seq_cnt = 1;
1012 set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
1013
1014 return 0;
1015 }
1016
1017 static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
1018 {
1019 int ret = 0;
1020 unsigned num_subctxts = uctxt->subctxt_cnt;
1021
1022 uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE);
1023 if (!uctxt->subctxt_uregbase) {
1024 ret = -ENOMEM;
1025 goto bail;
1026 }
1027 /* We can take the size of the RcvHdr Queue from the master */
1028 uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size *
1029 num_subctxts);
1030 if (!uctxt->subctxt_rcvhdr_base) {
1031 ret = -ENOMEM;
1032 goto bail_ureg;
1033 }
1034
1035 uctxt->subctxt_rcvegrbuf = vmalloc_user(uctxt->egrbufs.size *
1036 num_subctxts);
1037 if (!uctxt->subctxt_rcvegrbuf) {
1038 ret = -ENOMEM;
1039 goto bail_rhdr;
1040 }
1041 goto bail;
1042 bail_rhdr:
1043 vfree(uctxt->subctxt_rcvhdr_base);
1044 bail_ureg:
1045 vfree(uctxt->subctxt_uregbase);
1046 uctxt->subctxt_uregbase = NULL;
1047 bail:
1048 return ret;
1049 }
1050
1051 static int user_init(struct file *fp)
1052 {
1053 unsigned int rcvctrl_ops = 0;
1054 struct hfi1_filedata *fd = fp->private_data;
1055 struct hfi1_ctxtdata *uctxt = fd->uctxt;
1056
1057 /* make sure that the context has already been setup */
1058 if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
1059 return -EFAULT;
1060
1061 /* initialize poll variables... */
1062 uctxt->urgent = 0;
1063 uctxt->urgent_poll = 0;
1064
1065 /*
1066 * Now enable the ctxt for receive.
1067 * For chips that are set to DMA the tail register to memory
1068 * when they change (and when the update bit transitions from
1069 * 0 to 1. So for those chips, we turn it off and then back on.
1070 * This will (very briefly) affect any other open ctxts, but the
1071 * duration is very short, and therefore isn't an issue. We
1072 * explicitly set the in-memory tail copy to 0 beforehand, so we
1073 * don't have to wait to be sure the DMA update has happened
1074 * (chip resets head/tail to 0 on transition to enable).
1075 */
1076 if (uctxt->rcvhdrtail_kvaddr)
1077 clear_rcvhdrtail(uctxt);
1078
1079 /* Setup J_KEY before enabling the context */
1080 hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey);
1081
1082 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
1083 if (HFI1_CAP_KGET_MASK(uctxt->flags, HDRSUPP))
1084 rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB;
1085 /*
1086 * Ignore the bit in the flags for now until proper
1087 * support for multiple packet per rcv array entry is
1088 * added.
1089 */
1090 if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
1091 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
1092 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
1093 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
1094 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
1095 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
1096 /*
1097 * The RcvCtxtCtrl.TailUpd bit has to be explicitly written.
1098 * We can't rely on the correct value to be set from prior
1099 * uses of the chip or ctxt. Therefore, add the rcvctrl op
1100 * for both cases.
1101 */
1102 if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
1103 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
1104 else
1105 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
1106 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
1107
1108 /* Notify any waiting slaves */
1109 if (uctxt->subctxt_cnt) {
1110 clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
1111 wake_up(&uctxt->wait);
1112 }
1113
1114 return 0;
1115 }
1116
1117 static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
1118 {
1119 struct hfi1_ctxt_info cinfo;
1120 struct hfi1_filedata *fd = fp->private_data;
1121 struct hfi1_ctxtdata *uctxt = fd->uctxt;
1122 int ret = 0;
1123
1124 memset(&cinfo, 0, sizeof(cinfo));
1125 ret = hfi1_get_base_kinfo(uctxt, &cinfo);
1126 if (ret < 0)
1127 goto done;
1128 cinfo.num_active = hfi1_count_active_units();
1129 cinfo.unit = uctxt->dd->unit;
1130 cinfo.ctxt = uctxt->ctxt;
1131 cinfo.subctxt = fd->subctxt;
1132 cinfo.rcvtids = roundup(uctxt->egrbufs.alloced,
1133 uctxt->dd->rcv_entries.group_size) +
1134 uctxt->expected_count;
1135 cinfo.credits = uctxt->sc->credits;
1136 cinfo.numa_node = uctxt->numa_id;
1137 cinfo.rec_cpu = fd->rec_cpu_num;
1138 cinfo.send_ctxt = uctxt->sc->hw_context;
1139
1140 cinfo.egrtids = uctxt->egrbufs.alloced;
1141 cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
1142 cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2;
1143 cinfo.sdma_ring_size = fd->cq->nentries;
1144 cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size;
1145
1146 trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo);
1147 if (copy_to_user(ubase, &cinfo, sizeof(cinfo)))
1148 ret = -EFAULT;
1149 done:
1150 return ret;
1151 }
1152
1153 static int setup_ctxt(struct file *fp)
1154 {
1155 struct hfi1_filedata *fd = fp->private_data;
1156 struct hfi1_ctxtdata *uctxt = fd->uctxt;
1157 struct hfi1_devdata *dd = uctxt->dd;
1158 int ret = 0;
1159
1160 /*
1161 * Context should be set up only once, including allocation and
1162 * programming of eager buffers. This is done if context sharing
1163 * is not requested or by the master process.
1164 */
1165 if (!uctxt->subctxt_cnt || !fd->subctxt) {
1166 ret = hfi1_init_ctxt(uctxt->sc);
1167 if (ret)
1168 goto done;
1169
1170 /* Now allocate the RcvHdr queue and eager buffers. */
1171 ret = hfi1_create_rcvhdrq(dd, uctxt);
1172 if (ret)
1173 goto done;
1174 ret = hfi1_setup_eagerbufs(uctxt);
1175 if (ret)
1176 goto done;
1177 if (uctxt->subctxt_cnt && !fd->subctxt) {
1178 ret = setup_subctxt(uctxt);
1179 if (ret)
1180 goto done;
1181 }
1182 } else {
1183 ret = wait_event_interruptible(uctxt->wait, !test_bit(
1184 HFI1_CTXT_MASTER_UNINIT,
1185 &uctxt->event_flags));
1186 if (ret)
1187 goto done;
1188 }
1189
1190 ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
1191 if (ret)
1192 goto done;
1193 /*
1194 * Expected receive has to be setup for all processes (including
1195 * shared contexts). However, it has to be done after the master
1196 * context has been fully configured as it depends on the
1197 * eager/expected split of the RcvArray entries.
1198 * Setting it up here ensures that the subcontexts will be waiting
1199 * (due to the above wait_event_interruptible() until the master
1200 * is setup.
1201 */
1202 ret = hfi1_user_exp_rcv_init(fp);
1203 if (ret)
1204 goto done;
1205
1206 set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags);
1207 done:
1208 return ret;
1209 }
1210
1211 static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
1212 {
1213 struct hfi1_base_info binfo;
1214 struct hfi1_filedata *fd = fp->private_data;
1215 struct hfi1_ctxtdata *uctxt = fd->uctxt;
1216 struct hfi1_devdata *dd = uctxt->dd;
1217 ssize_t sz;
1218 unsigned offset;
1219 int ret = 0;
1220
1221 trace_hfi1_uctxtdata(uctxt->dd, uctxt);
1222
1223 memset(&binfo, 0, sizeof(binfo));
1224 binfo.hw_version = dd->revision;
1225 binfo.sw_version = HFI1_KERN_SWVERSION;
1226 binfo.bthqp = kdeth_qp;
1227 binfo.jkey = uctxt->jkey;
1228 /*
1229 * If more than 64 contexts are enabled the allocated credit
1230 * return will span two or three contiguous pages. Since we only
1231 * map the page containing the context's credit return address,
1232 * we need to calculate the offset in the proper page.
1233 */
1234 offset = ((u64)uctxt->sc->hw_free -
1235 (u64)dd->cr_base[uctxt->numa_id].va) % PAGE_SIZE;
1236 binfo.sc_credits_addr = HFI1_MMAP_TOKEN(PIO_CRED, uctxt->ctxt,
1237 fd->subctxt, offset);
1238 binfo.pio_bufbase = HFI1_MMAP_TOKEN(PIO_BUFS, uctxt->ctxt,
1239 fd->subctxt,
1240 uctxt->sc->base_addr);
1241 binfo.pio_bufbase_sop = HFI1_MMAP_TOKEN(PIO_BUFS_SOP,
1242 uctxt->ctxt,
1243 fd->subctxt,
1244 uctxt->sc->base_addr);
1245 binfo.rcvhdr_bufbase = HFI1_MMAP_TOKEN(RCV_HDRQ, uctxt->ctxt,
1246 fd->subctxt,
1247 uctxt->rcvhdrq);
1248 binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt,
1249 fd->subctxt,
1250 uctxt->egrbufs.rcvtids[0].phys);
1251 binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt,
1252 fd->subctxt, 0);
1253 /*
1254 * user regs are at
1255 * (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE))
1256 */
1257 binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt,
1258 fd->subctxt, 0);
1259 offset = offset_in_page((((uctxt->ctxt - dd->first_user_ctxt) *
1260 HFI1_MAX_SHARED_CTXTS) + fd->subctxt) *
1261 sizeof(*dd->events));
1262 binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt,
1263 fd->subctxt,
1264 offset);
1265 binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt,
1266 fd->subctxt,
1267 dd->status);
1268 if (HFI1_CAP_IS_USET(DMA_RTAIL))
1269 binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt,
1270 fd->subctxt, 0);
1271 if (uctxt->subctxt_cnt) {
1272 binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS,
1273 uctxt->ctxt,
1274 fd->subctxt, 0);
1275 binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ,
1276 uctxt->ctxt,
1277 fd->subctxt, 0);
1278 binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF,
1279 uctxt->ctxt,
1280 fd->subctxt, 0);
1281 }
1282 sz = (len < sizeof(binfo)) ? len : sizeof(binfo);
1283 if (copy_to_user(ubase, &binfo, sz))
1284 ret = -EFAULT;
1285 return ret;
1286 }
1287
1288 static unsigned int poll_urgent(struct file *fp,
1289 struct poll_table_struct *pt)
1290 {
1291 struct hfi1_filedata *fd = fp->private_data;
1292 struct hfi1_ctxtdata *uctxt = fd->uctxt;
1293 struct hfi1_devdata *dd = uctxt->dd;
1294 unsigned pollflag;
1295
1296 poll_wait(fp, &uctxt->wait, pt);
1297
1298 spin_lock_irq(&dd->uctxt_lock);
1299 if (uctxt->urgent != uctxt->urgent_poll) {
1300 pollflag = POLLIN | POLLRDNORM;
1301 uctxt->urgent_poll = uctxt->urgent;
1302 } else {
1303 pollflag = 0;
1304 set_bit(HFI1_CTXT_WAITING_URG, &uctxt->event_flags);
1305 }
1306 spin_unlock_irq(&dd->uctxt_lock);
1307
1308 return pollflag;
1309 }
1310
1311 static unsigned int poll_next(struct file *fp,
1312 struct poll_table_struct *pt)
1313 {
1314 struct hfi1_filedata *fd = fp->private_data;
1315 struct hfi1_ctxtdata *uctxt = fd->uctxt;
1316 struct hfi1_devdata *dd = uctxt->dd;
1317 unsigned pollflag;
1318
1319 poll_wait(fp, &uctxt->wait, pt);
1320
1321 spin_lock_irq(&dd->uctxt_lock);
1322 if (hdrqempty(uctxt)) {
1323 set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags);
1324 hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt);
1325 pollflag = 0;
1326 } else {
1327 pollflag = POLLIN | POLLRDNORM;
1328 }
1329 spin_unlock_irq(&dd->uctxt_lock);
1330
1331 return pollflag;
1332 }
1333
1334 /*
1335 * Find all user contexts in use, and set the specified bit in their
1336 * event mask.
1337 * See also find_ctxt() for a similar use, that is specific to send buffers.
1338 */
1339 int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit)
1340 {
1341 struct hfi1_ctxtdata *uctxt;
1342 struct hfi1_devdata *dd = ppd->dd;
1343 unsigned ctxt;
1344 int ret = 0;
1345 unsigned long flags;
1346
1347 if (!dd->events) {
1348 ret = -EINVAL;
1349 goto done;
1350 }
1351
1352 spin_lock_irqsave(&dd->uctxt_lock, flags);
1353 for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts;
1354 ctxt++) {
1355 uctxt = dd->rcd[ctxt];
1356 if (uctxt) {
1357 unsigned long *evs = dd->events +
1358 (uctxt->ctxt - dd->first_user_ctxt) *
1359 HFI1_MAX_SHARED_CTXTS;
1360 int i;
1361 /*
1362 * subctxt_cnt is 0 if not shared, so do base
1363 * separately, first, then remaining subctxt, if any
1364 */
1365 set_bit(evtbit, evs);
1366 for (i = 1; i < uctxt->subctxt_cnt; i++)
1367 set_bit(evtbit, evs + i);
1368 }
1369 }
1370 spin_unlock_irqrestore(&dd->uctxt_lock, flags);
1371 done:
1372 return ret;
1373 }
1374
1375 /**
1376 * manage_rcvq - manage a context's receive queue
1377 * @uctxt: the context
1378 * @subctxt: the sub-context
1379 * @start_stop: action to carry out
1380 *
1381 * start_stop == 0 disables receive on the context, for use in queue
1382 * overflow conditions. start_stop==1 re-enables, to be used to
1383 * re-init the software copy of the head register
1384 */
1385 static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
1386 int start_stop)
1387 {
1388 struct hfi1_devdata *dd = uctxt->dd;
1389 unsigned int rcvctrl_op;
1390
1391 if (subctxt)
1392 goto bail;
1393 /* atomically clear receive enable ctxt. */
1394 if (start_stop) {
1395 /*
1396 * On enable, force in-memory copy of the tail register to
1397 * 0, so that protocol code doesn't have to worry about
1398 * whether or not the chip has yet updated the in-memory
1399 * copy or not on return from the system call. The chip
1400 * always resets it's tail register back to 0 on a
1401 * transition from disabled to enabled.
1402 */
1403 if (uctxt->rcvhdrtail_kvaddr)
1404 clear_rcvhdrtail(uctxt);
1405 rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB;
1406 } else {
1407 rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS;
1408 }
1409 hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt);
1410 /* always; new head should be equal to new tail; see above */
1411 bail:
1412 return 0;
1413 }
1414
1415 /*
1416 * clear the event notifier events for this context.
1417 * User process then performs actions appropriate to bit having been
1418 * set, if desired, and checks again in future.
1419 */
1420 static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
1421 unsigned long events)
1422 {
1423 int i;
1424 struct hfi1_devdata *dd = uctxt->dd;
1425 unsigned long *evs;
1426
1427 if (!dd->events)
1428 return 0;
1429
1430 evs = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) *
1431 HFI1_MAX_SHARED_CTXTS) + subctxt;
1432
1433 for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) {
1434 if (!test_bit(i, &events))
1435 continue;
1436 clear_bit(i, evs);
1437 }
1438 return 0;
1439 }
1440
1441 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
1442 u16 pkey)
1443 {
1444 int ret = -ENOENT, i, intable = 0;
1445 struct hfi1_pportdata *ppd = uctxt->ppd;
1446 struct hfi1_devdata *dd = uctxt->dd;
1447
1448 if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) {
1449 ret = -EINVAL;
1450 goto done;
1451 }
1452
1453 for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++)
1454 if (pkey == ppd->pkeys[i]) {
1455 intable = 1;
1456 break;
1457 }
1458
1459 if (intable)
1460 ret = hfi1_set_ctxt_pkey(dd, uctxt->ctxt, pkey);
1461 done:
1462 return ret;
1463 }
1464
1465 static void user_remove(struct hfi1_devdata *dd)
1466 {
1467
1468 hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device);
1469 }
1470
1471 static int user_add(struct hfi1_devdata *dd)
1472 {
1473 char name[10];
1474 int ret;
1475
1476 snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit);
1477 ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops,
1478 &dd->user_cdev, &dd->user_device,
1479 true, &dd->kobj);
1480 if (ret)
1481 user_remove(dd);
1482
1483 return ret;
1484 }
1485
1486 /*
1487 * Create per-unit files in /dev
1488 */
1489 int hfi1_device_create(struct hfi1_devdata *dd)
1490 {
1491 return user_add(dd);
1492 }
1493
1494 /*
1495 * Remove per-unit files in /dev
1496 * void, core kernel returns no errors for this stuff
1497 */
1498 void hfi1_device_remove(struct hfi1_devdata *dd)
1499 {
1500 user_remove(dd);
1501 }