drivers/iommu/intel/svm.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright © 2015 Intel Corporation.
   4  *
   5  * Authors: David Woodhouse <dwmw2@infradead.org>
   6  */
   7
   8 #include <linux/mmu_notifier.h>
   9 #include <linux/sched.h>
  10 #include <linux/sched/mm.h>
  11 #include <linux/slab.h>
  12 #include <linux/rculist.h>
  13 #include <linux/pci.h>
  14 #include <linux/pci-ats.h>
  15 #include <linux/dmar.h>
  16 #include <linux/interrupt.h>
  17 #include <linux/mm_types.h>
  18 #include <linux/xarray.h>
  19 #include <asm/page.h>
  20 #include <asm/fpu/api.h>
  21
  22 #include "iommu.h"
  23 #include "pasid.h"
  24 #include "perf.h"
  25 #include "../iommu-sva.h"
  26 #include "trace.h"
  27
  28 static irqreturn_t prq_event_thread(int irq, void *d);
  29 static void intel_svm_drain_prq(struct device *dev, u32 pasid);
  30 #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
  31
  32 static DEFINE_XARRAY_ALLOC(pasid_private_array);
  33 static int pasid_private_add(ioasid_t pasid, void *priv)
  34 {
  35         return xa_alloc(&pasid_private_array, &pasid, priv,
  36                         XA_LIMIT(pasid, pasid), GFP_ATOMIC);
  37 }
  38
  39 static void pasid_private_remove(ioasid_t pasid)
  40 {
  41         xa_erase(&pasid_private_array, pasid);
  42 }
  43
  44 static void *pasid_private_find(ioasid_t pasid)
  45 {
  46         return xa_load(&pasid_private_array, pasid);
  47 }
  48
  49 static struct intel_svm_dev *
  50 svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
  51 {
  52         struct intel_svm_dev *sdev = NULL, *t;
  53
  54         rcu_read_lock();
  55         list_for_each_entry_rcu(t, &svm->devs, list) {
  56                 if (t->dev == dev) {
  57                         sdev = t;
  58                         break;
  59                 }
  60         }
  61         rcu_read_unlock();
  62
  63         return sdev;
  64 }
  65
  66 int intel_svm_enable_prq(struct intel_iommu *iommu)
  67 {
  68         struct iopf_queue *iopfq;
  69         struct page *pages;
  70         int irq, ret;
  71
  72         pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
  73         if (!pages) {
  74                 pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
  75                         iommu->name);
  76                 return -ENOMEM;
  77         }
  78         iommu->prq = page_address(pages);
  79
  80         irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
  81         if (irq <= 0) {
  82                 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
  83                        iommu->name);
  84                 ret = -EINVAL;
  85                 goto free_prq;
  86         }
  87         iommu->pr_irq = irq;
  88
  89         snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
  90                  "dmar%d-iopfq", iommu->seq_id);
  91         iopfq = iopf_queue_alloc(iommu->iopfq_name);
  92         if (!iopfq) {
  93                 pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
  94                 ret = -ENOMEM;
  95                 goto free_hwirq;
  96         }
  97         iommu->iopf_queue = iopfq;
  98
  99         snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
 100
 101         ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
 102                                    iommu->prq_name, iommu);
 103         if (ret) {
 104                 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
 105                        iommu->name);
 106                 goto free_iopfq;
 107         }
 108         dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
 109         dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
 110         dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
 111
 112         init_completion(&iommu->prq_complete);
 113
 114         return 0;
 115
 116 free_iopfq:
 117         iopf_queue_free(iommu->iopf_queue);
 118         iommu->iopf_queue = NULL;
 119 free_hwirq:
 120         dmar_free_hwirq(irq);
 121         iommu->pr_irq = 0;
 122 free_prq:
 123         free_pages((unsigned long)iommu->prq, PRQ_ORDER);
 124         iommu->prq = NULL;
 125
 126         return ret;
 127 }
 128
 129 int intel_svm_finish_prq(struct intel_iommu *iommu)
 130 {
 131         dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
 132         dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
 133         dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
 134
 135         if (iommu->pr_irq) {
 136                 free_irq(iommu->pr_irq, iommu);
 137                 dmar_free_hwirq(iommu->pr_irq);
 138                 iommu->pr_irq = 0;
 139         }
 140
 141         if (iommu->iopf_queue) {
 142                 iopf_queue_free(iommu->iopf_queue);
 143                 iommu->iopf_queue = NULL;
 144         }
 145
 146         free_pages((unsigned long)iommu->prq, PRQ_ORDER);
 147         iommu->prq = NULL;
 148
 149         return 0;
 150 }
 151
 152 void intel_svm_check(struct intel_iommu *iommu)
 153 {
 154         if (!pasid_supported(iommu))
 155                 return;
 156
 157         if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
 158             !cap_fl1gp_support(iommu->cap)) {
 159                 pr_err("%s SVM disabled, incompatible 1GB page capability\n",
 160                        iommu->name);
 161                 return;
 162         }
 163
 164         if (cpu_feature_enabled(X86_FEATURE_LA57) &&
 165             !cap_fl5lp_support(iommu->cap)) {
 166                 pr_err("%s SVM disabled, incompatible paging mode\n",
 167                        iommu->name);
 168                 return;
 169         }
 170
 171         iommu->flags |= VTD_FLAG_SVM_CAPABLE;
 172 }
 173
 174 static void __flush_svm_range_dev(struct intel_svm *svm,
 175                                   struct intel_svm_dev *sdev,
 176                                   unsigned long address,
 177                                   unsigned long pages, int ih)
 178 {
 179         struct device_domain_info *info = dev_iommu_priv_get(sdev->dev);
 180
 181         if (WARN_ON(!pages))
 182                 return;
 183
 184         qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
 185         if (info->ats_enabled) {
 186                 qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
 187                                          svm->pasid, sdev->qdep, address,
 188                                          order_base_2(pages));
 189                 quirk_extra_dev_tlb_flush(info, address, order_base_2(pages),
 190                                           svm->pasid, sdev->qdep);
 191         }
 192 }
 193
 194 static void intel_flush_svm_range_dev(struct intel_svm *svm,
 195                                       struct intel_svm_dev *sdev,
 196                                       unsigned long address,
 197                                       unsigned long pages, int ih)
 198 {
 199         unsigned long shift = ilog2(__roundup_pow_of_two(pages));
 200         unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift));
 201         unsigned long start = ALIGN_DOWN(address, align);
 202         unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align);
 203
 204         while (start < end) {
 205                 __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih);
 206                 start += align;
 207         }
 208 }
 209
 210 static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
 211                                 unsigned long pages, int ih)
 212 {
 213         struct intel_svm_dev *sdev;
 214
 215         rcu_read_lock();
 216         list_for_each_entry_rcu(sdev, &svm->devs, list)
 217                 intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
 218         rcu_read_unlock();
 219 }
 220
 221 /* Pages have been freed at this point */
 222 static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
 223                                         struct mm_struct *mm,
 224                                         unsigned long start, unsigned long end)
 225 {
 226         struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
 227
 228         intel_flush_svm_range(svm, start,
 229                               (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
 230 }
 231
 232 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 233 {
 234         struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
 235         struct intel_svm_dev *sdev;
 236
 237         /* This might end up being called from exit_mmap(), *before* the page
 238          * tables are cleared. And __mmu_notifier_release() will delete us from
 239          * the list of notifiers so that our invalidate_range() callback doesn't
 240          * get called when the page tables are cleared. So we need to protect
 241          * against hardware accessing those page tables.
 242          *
 243          * We do it by clearing the entry in the PASID table and then flushing
 244          * the IOTLB and the PASID table caches. This might upset hardware;
 245          * perhaps we'll want to point the PASID to a dummy PGD (like the zero
 246          * page) so that we end up taking a fault that the hardware really
 247          * *has* to handle gracefully without affecting other processes.
 248          */
 249         rcu_read_lock();
 250         list_for_each_entry_rcu(sdev, &svm->devs, list)
 251                 intel_pasid_tear_down_entry(sdev->iommu, sdev->dev,
 252                                             svm->pasid, true);
 253         rcu_read_unlock();
 254
 255 }
 256
 257 static const struct mmu_notifier_ops intel_mmuops = {
 258         .release = intel_mm_release,
 259         .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs,
 260 };
 261
 262 static DEFINE_MUTEX(pasid_mutex);
 263
 264 static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
 265                              struct intel_svm **rsvm,
 266                              struct intel_svm_dev **rsdev)
 267 {
 268         struct intel_svm_dev *sdev = NULL;
 269         struct intel_svm *svm;
 270
 271         /* The caller should hold the pasid_mutex lock */
 272         if (WARN_ON(!mutex_is_locked(&pasid_mutex)))
 273                 return -EINVAL;
 274
 275         if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX)
 276                 return -EINVAL;
 277
 278         svm = pasid_private_find(pasid);
 279         if (IS_ERR(svm))
 280                 return PTR_ERR(svm);
 281
 282         if (!svm)
 283                 goto out;
 284
 285         /*
 286          * If we found svm for the PASID, there must be at least one device
 287          * bond.
 288          */
 289         if (WARN_ON(list_empty(&svm->devs)))
 290                 return -EINVAL;
 291         sdev = svm_lookup_device_by_dev(svm, dev);
 292
 293 out:
 294         *rsvm = svm;
 295         *rsdev = sdev;
 296
 297         return 0;
 298 }
 299
 300 static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev,
 301                              struct mm_struct *mm)
 302 {
 303         struct device_domain_info *info = dev_iommu_priv_get(dev);
 304         struct intel_svm_dev *sdev;
 305         struct intel_svm *svm;
 306         unsigned long sflags;
 307         int ret = 0;
 308
 309         svm = pasid_private_find(mm->pasid);
 310         if (!svm) {
 311                 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
 312                 if (!svm)
 313                         return -ENOMEM;
 314
 315                 svm->pasid = mm->pasid;
 316                 svm->mm = mm;
 317                 INIT_LIST_HEAD_RCU(&svm->devs);
 318
 319                 svm->notifier.ops = &intel_mmuops;
 320                 ret = mmu_notifier_register(&svm->notifier, mm);
 321                 if (ret) {
 322                         kfree(svm);
 323                         return ret;
 324                 }
 325
 326                 ret = pasid_private_add(svm->pasid, svm);
 327                 if (ret) {
 328                         mmu_notifier_unregister(&svm->notifier, mm);
 329                         kfree(svm);
 330                         return ret;
 331                 }
 332         }
 333
 334         sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
 335         if (!sdev) {
 336                 ret = -ENOMEM;
 337                 goto free_svm;
 338         }
 339
 340         sdev->dev = dev;
 341         sdev->iommu = iommu;
 342         sdev->did = FLPT_DEFAULT_DID;
 343         sdev->sid = PCI_DEVID(info->bus, info->devfn);
 344         init_rcu_head(&sdev->rcu);
 345         if (info->ats_enabled) {
 346                 sdev->qdep = info->ats_qdep;
 347                 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
 348                         sdev->qdep = 0;
 349         }
 350
 351         /* Setup the pasid table: */
 352         sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
 353         ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
 354                                             FLPT_DEFAULT_DID, sflags);
 355         if (ret)
 356                 goto free_sdev;
 357
 358         list_add_rcu(&sdev->list, &svm->devs);
 359
 360         return 0;
 361
 362 free_sdev:
 363         kfree(sdev);
 364 free_svm:
 365         if (list_empty(&svm->devs)) {
 366                 mmu_notifier_unregister(&svm->notifier, mm);
 367                 pasid_private_remove(mm->pasid);
 368                 kfree(svm);
 369         }
 370
 371         return ret;
 372 }
 373
 374 /* Caller must hold pasid_mutex */
 375 static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
 376 {
 377         struct intel_svm_dev *sdev;
 378         struct intel_iommu *iommu;
 379         struct intel_svm *svm;
 380         struct mm_struct *mm;
 381         int ret = -EINVAL;
 382
 383         iommu = device_to_iommu(dev, NULL, NULL);
 384         if (!iommu)
 385                 goto out;
 386
 387         ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
 388         if (ret)
 389                 goto out;
 390         mm = svm->mm;
 391
 392         if (sdev) {
 393                 list_del_rcu(&sdev->list);
 394                 /*
 395                  * Flush the PASID cache and IOTLB for this device.
 396                  * Note that we do depend on the hardware *not* using
 397                  * the PASID any more. Just as we depend on other
 398                  * devices never using PASIDs that they have no right
 399                  * to use. We have a *shared* PASID table, because it's
 400                  * large and has to be physically contiguous. So it's
 401                  * hard to be as defensive as we might like.
 402                  */
 403                 intel_pasid_tear_down_entry(iommu, dev, svm->pasid, false);
 404                 intel_svm_drain_prq(dev, svm->pasid);
 405                 kfree_rcu(sdev, rcu);
 406
 407                 if (list_empty(&svm->devs)) {
 408                         if (svm->notifier.ops)
 409                                 mmu_notifier_unregister(&svm->notifier, mm);
 410                         pasid_private_remove(svm->pasid);
 411                         /*
 412                          * We mandate that no page faults may be outstanding
 413                          * for the PASID when intel_svm_unbind_mm() is called.
 414                          * If that is not obeyed, subtle errors will happen.
 415                          * Let's make them less subtle...
 416                          */
 417                         memset(svm, 0x6b, sizeof(*svm));
 418                         kfree(svm);
 419                 }
 420         }
 421 out:
 422         return ret;
 423 }
 424
 425 /* Page request queue descriptor */
 426 struct page_req_dsc {
 427         union {
 428                 struct {
 429                         u64 type:8;
 430                         u64 pasid_present:1;
 431                         u64 priv_data_present:1;
 432                         u64 rsvd:6;
 433                         u64 rid:16;
 434                         u64 pasid:20;
 435                         u64 exe_req:1;
 436                         u64 pm_req:1;
 437                         u64 rsvd2:10;
 438                 };
 439                 u64 qw_0;
 440         };
 441         union {
 442                 struct {
 443                         u64 rd_req:1;
 444                         u64 wr_req:1;
 445                         u64 lpig:1;
 446                         u64 prg_index:9;
 447                         u64 addr:52;
 448                 };
 449                 u64 qw_1;
 450         };
 451         u64 priv_data[2];
 452 };
 453
 454 static bool is_canonical_address(u64 addr)
 455 {
 456         int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
 457         long saddr = (long) addr;
 458
 459         return (((saddr << shift) >> shift) == saddr);
 460 }
 461
 462 /**
 463  * intel_svm_drain_prq - Drain page requests and responses for a pasid
 464  * @dev: target device
 465  * @pasid: pasid for draining
 466  *
 467  * Drain all pending page requests and responses related to @pasid in both
 468  * software and hardware. This is supposed to be called after the device
 469  * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
 470  * and DevTLB have been invalidated.
 471  *
 472  * It waits until all pending page requests for @pasid in the page fault
 473  * queue are completed by the prq handling thread. Then follow the steps
 474  * described in VT-d spec CH7.10 to drain all page requests and page
 475  * responses pending in the hardware.
 476  */
 477 static void intel_svm_drain_prq(struct device *dev, u32 pasid)
 478 {
 479         struct device_domain_info *info;
 480         struct dmar_domain *domain;
 481         struct intel_iommu *iommu;
 482         struct qi_desc desc[3];
 483         struct pci_dev *pdev;
 484         int head, tail;
 485         u16 sid, did;
 486         int qdep;
 487
 488         info = dev_iommu_priv_get(dev);
 489         if (WARN_ON(!info || !dev_is_pci(dev)))
 490                 return;
 491
 492         if (!info->pri_enabled)
 493                 return;
 494
 495         iommu = info->iommu;
 496         domain = info->domain;
 497         pdev = to_pci_dev(dev);
 498         sid = PCI_DEVID(info->bus, info->devfn);
 499         did = domain_id_iommu(domain, iommu);
 500         qdep = pci_ats_queue_depth(pdev);
 501
 502         /*
 503          * Check and wait until all pending page requests in the queue are
 504          * handled by the prq handling thread.
 505          */
 506 prq_retry:
 507         reinit_completion(&iommu->prq_complete);
 508         tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
 509         head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
 510         while (head != tail) {
 511                 struct page_req_dsc *req;
 512
 513                 req = &iommu->prq[head / sizeof(*req)];
 514                 if (!req->pasid_present || req->pasid != pasid) {
 515                         head = (head + sizeof(*req)) & PRQ_RING_MASK;
 516                         continue;
 517                 }
 518
 519                 wait_for_completion(&iommu->prq_complete);
 520                 goto prq_retry;
 521         }
 522
 523         /*
 524          * A work in IO page fault workqueue may try to lock pasid_mutex now.
 525          * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for
 526          * all works in the workqueue to finish may cause deadlock.
 527          *
 528          * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev().
 529          * Unlock it to allow the works to be handled while waiting for
 530          * them to finish.
 531          */
 532         lockdep_assert_held(&pasid_mutex);
 533         mutex_unlock(&pasid_mutex);
 534         iopf_queue_flush_dev(dev);
 535         mutex_lock(&pasid_mutex);
 536
 537         /*
 538          * Perform steps described in VT-d spec CH7.10 to drain page
 539          * requests and responses in hardware.
 540          */
 541         memset(desc, 0, sizeof(desc));
 542         desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
 543                         QI_IWD_FENCE |
 544                         QI_IWD_TYPE;
 545         desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
 546                         QI_EIOTLB_DID(did) |
 547                         QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
 548                         QI_EIOTLB_TYPE;
 549         desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
 550                         QI_DEV_EIOTLB_SID(sid) |
 551                         QI_DEV_EIOTLB_QDEP(qdep) |
 552                         QI_DEIOTLB_TYPE |
 553                         QI_DEV_IOTLB_PFSID(info->pfsid);
 554 qi_retry:
 555         reinit_completion(&iommu->prq_complete);
 556         qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
 557         if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
 558                 wait_for_completion(&iommu->prq_complete);
 559                 goto qi_retry;
 560         }
 561 }
 562
 563 static int prq_to_iommu_prot(struct page_req_dsc *req)
 564 {
 565         int prot = 0;
 566
 567         if (req->rd_req)
 568                 prot |= IOMMU_FAULT_PERM_READ;
 569         if (req->wr_req)
 570                 prot |= IOMMU_FAULT_PERM_WRITE;
 571         if (req->exe_req)
 572                 prot |= IOMMU_FAULT_PERM_EXEC;
 573         if (req->pm_req)
 574                 prot |= IOMMU_FAULT_PERM_PRIV;
 575
 576         return prot;
 577 }
 578
 579 static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
 580                                 struct page_req_dsc *desc)
 581 {
 582         struct iommu_fault_event event;
 583
 584         if (!dev || !dev_is_pci(dev))
 585                 return -ENODEV;
 586
 587         /* Fill in event data for device specific processing */
 588         memset(&event, 0, sizeof(struct iommu_fault_event));
 589         event.fault.type = IOMMU_FAULT_PAGE_REQ;
 590         event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
 591         event.fault.prm.pasid = desc->pasid;
 592         event.fault.prm.grpid = desc->prg_index;
 593         event.fault.prm.perm = prq_to_iommu_prot(desc);
 594
 595         if (desc->lpig)
 596                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
 597         if (desc->pasid_present) {
 598                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
 599                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
 600         }
 601         if (desc->priv_data_present) {
 602                 /*
 603                  * Set last page in group bit if private data is present,
 604                  * page response is required as it does for LPIG.
 605                  * iommu_report_device_fault() doesn't understand this vendor
 606                  * specific requirement thus we set last_page as a workaround.
 607                  */
 608                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
 609                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
 610                 event.fault.prm.private_data[0] = desc->priv_data[0];
 611                 event.fault.prm.private_data[1] = desc->priv_data[1];
 612         } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
 613                 /*
 614                  * If the private data fields are not used by hardware, use it
 615                  * to monitor the prq handle latency.
 616                  */
 617                 event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
 618         }
 619
 620         return iommu_report_device_fault(dev, &event);
 621 }
 622
 623 static void handle_bad_prq_event(struct intel_iommu *iommu,
 624                                  struct page_req_dsc *req, int result)
 625 {
 626         struct qi_desc desc;
 627
 628         pr_err("%s: Invalid page request: %08llx %08llx\n",
 629                iommu->name, ((unsigned long long *)req)[0],
 630                ((unsigned long long *)req)[1]);
 631
 632         /*
 633          * Per VT-d spec. v3.0 ch7.7, system software must
 634          * respond with page group response if private data
 635          * is present (PDP) or last page in group (LPIG) bit
 636          * is set. This is an additional VT-d feature beyond
 637          * PCI ATS spec.
 638          */
 639         if (!req->lpig && !req->priv_data_present)
 640                 return;
 641
 642         desc.qw0 = QI_PGRP_PASID(req->pasid) |
 643                         QI_PGRP_DID(req->rid) |
 644                         QI_PGRP_PASID_P(req->pasid_present) |
 645                         QI_PGRP_PDP(req->priv_data_present) |
 646                         QI_PGRP_RESP_CODE(result) |
 647                         QI_PGRP_RESP_TYPE;
 648         desc.qw1 = QI_PGRP_IDX(req->prg_index) |
 649                         QI_PGRP_LPIG(req->lpig);
 650
 651         if (req->priv_data_present) {
 652                 desc.qw2 = req->priv_data[0];
 653                 desc.qw3 = req->priv_data[1];
 654         } else {
 655                 desc.qw2 = 0;
 656                 desc.qw3 = 0;
 657         }
 658
 659         qi_submit_sync(iommu, &desc, 1, 0);
 660 }
 661
 662 static irqreturn_t prq_event_thread(int irq, void *d)
 663 {
 664         struct intel_iommu *iommu = d;
 665         struct page_req_dsc *req;
 666         int head, tail, handled;
 667         struct pci_dev *pdev;
 668         u64 address;
 669
 670         /*
 671          * Clear PPR bit before reading head/tail registers, to ensure that
 672          * we get a new interrupt if needed.
 673          */
 674         writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
 675
 676         tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
 677         head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
 678         handled = (head != tail);
 679         while (head != tail) {
 680                 req = &iommu->prq[head / sizeof(*req)];
 681                 address = (u64)req->addr << VTD_PAGE_SHIFT;
 682
 683                 if (unlikely(!req->pasid_present)) {
 684                         pr_err("IOMMU: %s: Page request without PASID\n",
 685                                iommu->name);
 686 bad_req:
 687                         handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
 688                         goto prq_advance;
 689                 }
 690
 691                 if (unlikely(!is_canonical_address(address))) {
 692                         pr_err("IOMMU: %s: Address is not canonical\n",
 693                                iommu->name);
 694                         goto bad_req;
 695                 }
 696
 697                 if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
 698                         pr_err("IOMMU: %s: Page request in Privilege Mode\n",
 699                                iommu->name);
 700                         goto bad_req;
 701                 }
 702
 703                 if (unlikely(req->exe_req && req->rd_req)) {
 704                         pr_err("IOMMU: %s: Execution request not supported\n",
 705                                iommu->name);
 706                         goto bad_req;
 707                 }
 708
 709                 /* Drop Stop Marker message. No need for a response. */
 710                 if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
 711                         goto prq_advance;
 712
 713                 pdev = pci_get_domain_bus_and_slot(iommu->segment,
 714                                                    PCI_BUS_NUM(req->rid),
 715                                                    req->rid & 0xff);
 716                 /*
 717                  * If prq is to be handled outside iommu driver via receiver of
 718                  * the fault notifiers, we skip the page response here.
 719                  */
 720                 if (!pdev)
 721                         goto bad_req;
 722
 723                 if (intel_svm_prq_report(iommu, &pdev->dev, req))
 724                         handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
 725                 else
 726                         trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
 727                                          req->priv_data[0], req->priv_data[1],
 728                                          iommu->prq_seq_number++);
 729                 pci_dev_put(pdev);
 730 prq_advance:
 731                 head = (head + sizeof(*req)) & PRQ_RING_MASK;
 732         }
 733
 734         dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
 735
 736         /*
 737          * Clear the page request overflow bit and wake up all threads that
 738          * are waiting for the completion of this handling.
 739          */
 740         if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
 741                 pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
 742                                     iommu->name);
 743                 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
 744                 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
 745                 if (head == tail) {
 746                         iopf_queue_discard_partial(iommu->iopf_queue);
 747                         writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
 748                         pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
 749                                             iommu->name);
 750                 }
 751         }
 752
 753         if (!completion_done(&iommu->prq_complete))
 754                 complete(&iommu->prq_complete);
 755
 756         return IRQ_RETVAL(handled);
 757 }
 758
 759 int intel_svm_page_response(struct device *dev,
 760                             struct iommu_fault_event *evt,
 761                             struct iommu_page_response *msg)
 762 {
 763         struct iommu_fault_page_request *prm;
 764         struct intel_iommu *iommu;
 765         bool private_present;
 766         bool pasid_present;
 767         bool last_page;
 768         u8 bus, devfn;
 769         int ret = 0;
 770         u16 sid;
 771
 772         if (!dev || !dev_is_pci(dev))
 773                 return -ENODEV;
 774
 775         iommu = device_to_iommu(dev, &bus, &devfn);
 776         if (!iommu)
 777                 return -ENODEV;
 778
 779         if (!msg || !evt)
 780                 return -EINVAL;
 781
 782         prm = &evt->fault.prm;
 783         sid = PCI_DEVID(bus, devfn);
 784         pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
 785         private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
 786         last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
 787
 788         if (!pasid_present) {
 789                 ret = -EINVAL;
 790                 goto out;
 791         }
 792
 793         if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
 794                 ret = -EINVAL;
 795                 goto out;
 796         }
 797
 798         /*
 799          * Per VT-d spec. v3.0 ch7.7, system software must respond
 800          * with page group response if private data is present (PDP)
 801          * or last page in group (LPIG) bit is set. This is an
 802          * additional VT-d requirement beyond PCI ATS spec.
 803          */
 804         if (last_page || private_present) {
 805                 struct qi_desc desc;
 806
 807                 desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
 808                                 QI_PGRP_PASID_P(pasid_present) |
 809                                 QI_PGRP_PDP(private_present) |
 810                                 QI_PGRP_RESP_CODE(msg->code) |
 811                                 QI_PGRP_RESP_TYPE;
 812                 desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
 813                 desc.qw2 = 0;
 814                 desc.qw3 = 0;
 815
 816                 if (private_present) {
 817                         desc.qw2 = prm->private_data[0];
 818                         desc.qw3 = prm->private_data[1];
 819                 } else if (prm->private_data[0]) {
 820                         dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
 821                                 ktime_to_ns(ktime_get()) - prm->private_data[0]);
 822                 }
 823
 824                 qi_submit_sync(iommu, &desc, 1, 0);
 825         }
 826 out:
 827         return ret;
 828 }
 829
 830 void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid)
 831 {
 832         mutex_lock(&pasid_mutex);
 833         intel_svm_unbind_mm(dev, pasid);
 834         mutex_unlock(&pasid_mutex);
 835 }
 836
 837 static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
 838                                    struct device *dev, ioasid_t pasid)
 839 {
 840         struct device_domain_info *info = dev_iommu_priv_get(dev);
 841         struct intel_iommu *iommu = info->iommu;
 842         struct mm_struct *mm = domain->mm;
 843         int ret;
 844
 845         mutex_lock(&pasid_mutex);
 846         ret = intel_svm_bind_mm(iommu, dev, mm);
 847         mutex_unlock(&pasid_mutex);
 848
 849         return ret;
 850 }
 851
 852 static void intel_svm_domain_free(struct iommu_domain *domain)
 853 {
 854         kfree(to_dmar_domain(domain));
 855 }
 856
 857 static const struct iommu_domain_ops intel_svm_domain_ops = {
 858         .set_dev_pasid          = intel_svm_set_dev_pasid,
 859         .free                   = intel_svm_domain_free
 860 };
 861
 862 struct iommu_domain *intel_svm_domain_alloc(void)
 863 {
 864         struct dmar_domain *domain;
 865
 866         domain = kzalloc(sizeof(*domain), GFP_KERNEL);
 867         if (!domain)
 868                 return NULL;
 869         domain->domain.ops = &intel_svm_domain_ops;
 870
 871         return &domain->domain;
 872 }