long index,
                        unsigned long *hpa,
                        enum dma_data_direction *direction);
+
+       __be64 *(*useraddrptr)(struct iommu_table *tbl, long index);
 #endif
        void (*clear)(struct iommu_table *tbl,
                        long index, long npages);
 };
 
 #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
-               ((tbl)->it_userspace ? \
-                       &((tbl)->it_userspace[(entry) - (tbl)->it_offset]) : \
-                       NULL)
+               ((tbl)->it_ops->useraddrptr((tbl), (entry)))
 
 /* Pure 2^n version of get_order */
 static inline __attribute_const__
 
                /* it_userspace allocation might be delayed */
                return H_TOO_HARD;
 
-       pua = (void *) vmalloc_to_phys(pua);
-       if (WARN_ON_ONCE_RM(!pua))
-               return H_HARDWARE;
-
        mem = mm_iommu_lookup_rm(kvm->mm, be64_to_cpu(*pua), pgsize);
        if (!mem)
                return H_TOO_HARD;
        if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa)))
                return H_HARDWARE;
 
-       pua = (void *) vmalloc_to_phys(pua);
-       if (WARN_ON_ONCE_RM(!pua))
-               return H_HARDWARE;
-
        if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
                return H_CLOSED;
 
 
        tbl->it_type = TCE_PCI;
 }
 
-static __be64 *pnv_tce(struct iommu_table *tbl, long idx)
+static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx)
 {
-       __be64 *tmp = ((__be64 *)tbl->it_base);
+       __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
        int  level = tbl->it_indirect_levels;
        const long shift = ilog2(tbl->it_level_size);
        unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
                        ((rpn + i) << tbl->it_page_shift);
                unsigned long idx = index - tbl->it_offset + i;
 
-               *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce);
+               *(pnv_tce(tbl, false, idx)) = cpu_to_be64(newtce);
        }
 
        return 0;
        if (newtce & TCE_PCI_WRITE)
                newtce |= TCE_PCI_READ;
 
-       oldtce = be64_to_cpu(xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce)));
+       oldtce = be64_to_cpu(xchg(pnv_tce(tbl, false, idx),
+                                 cpu_to_be64(newtce)));
        *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
        *direction = iommu_tce_direction(oldtce);
 
        return 0;
 }
+
+__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index)
+{
+       if (WARN_ON_ONCE(!tbl->it_userspace))
+               return NULL;
+
+       return pnv_tce(tbl, true, index - tbl->it_offset);
+}
 #endif
 
 void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
        for (i = 0; i < npages; i++) {
                unsigned long idx = index - tbl->it_offset + i;
 
-               *(pnv_tce(tbl, idx)) = cpu_to_be64(0);
+               *(pnv_tce(tbl, false, idx)) = cpu_to_be64(0);
        }
 }
 
 unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
 {
-       return be64_to_cpu(*(pnv_tce(tbl, index - tbl->it_offset)));
+       __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset);
+
+       return be64_to_cpu(*ptce);
 }
 
 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
 
        pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
                        tbl->it_indirect_levels);
+       if (tbl->it_userspace) {
+               pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
+                               tbl->it_indirect_levels);
+       }
 }
 
 static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
 
 long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
                __u32 page_shift, __u64 window_size, __u32 levels,
-               struct iommu_table *tbl)
+               bool alloc_userspace_copy, struct iommu_table *tbl)
 {
-       void *addr;
+       void *addr, *uas = NULL;
        unsigned long offset = 0, level_shift, total_allocated = 0;
+       unsigned long total_allocated_uas = 0;
        const unsigned int window_shift = ilog2(window_size);
        unsigned int entries_shift = window_shift - page_shift;
        unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
         * we did not allocate as much as we wanted,
         * release partially allocated table.
         */
-       if (offset < tce_table_size) {
-               pnv_pci_ioda2_table_do_free_pages(addr,
-                               1ULL << (level_shift - 3), levels - 1);
-               return -ENOMEM;
+       if (offset < tce_table_size)
+               goto free_tces_exit;
+
+       /* Allocate userspace view of the TCE table */
+       if (alloc_userspace_copy) {
+               offset = 0;
+               uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
+                               levels, tce_table_size, &offset,
+                               &total_allocated_uas);
+               if (!uas)
+                       goto free_tces_exit;
+               if (offset < tce_table_size ||
+                               total_allocated_uas != total_allocated)
+                       goto free_uas_exit;
        }
 
        /* Setup linux iommu table */
        tbl->it_level_size = 1ULL << (level_shift - 3);
        tbl->it_indirect_levels = levels - 1;
        tbl->it_allocated_size = total_allocated;
+       tbl->it_userspace = uas;
 
-       pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n",
-                       window_size, tce_table_size, bus_offset);
+       pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d\n",
+                       window_size, tce_table_size, bus_offset, tbl->it_base,
+                       tbl->it_userspace, levels);
 
        return 0;
+
+free_uas_exit:
+       pnv_pci_ioda2_table_do_free_pages(uas,
+                       1ULL << (level_shift - 3), levels - 1);
+free_tces_exit:
+       pnv_pci_ioda2_table_do_free_pages(addr,
+                       1ULL << (level_shift - 3), levels - 1);
+
+       return -ENOMEM;
 }
 
 static void pnv_iommu_table_group_link_free(struct rcu_head *head)
 
 #ifdef CONFIG_IOMMU_API
        .exchange = pnv_ioda1_tce_xchg,
        .exchange_rm = pnv_ioda1_tce_xchg_rm,
+       .useraddrptr = pnv_tce_useraddrptr,
 #endif
        .clear = pnv_ioda1_tce_free,
        .get = pnv_tce_get,
 #ifdef CONFIG_IOMMU_API
        .exchange = pnv_ioda2_tce_xchg,
        .exchange_rm = pnv_ioda2_tce_xchg_rm,
+       .useraddrptr = pnv_tce_useraddrptr,
 #endif
        .clear = pnv_ioda2_tce_free,
        .get = pnv_tce_get,
 
 static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
                int num, __u32 page_shift, __u64 window_size, __u32 levels,
-               struct iommu_table **ptbl)
+               bool alloc_userspace_copy, struct iommu_table **ptbl)
 {
        struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
                        table_group);
 
        ret = pnv_pci_ioda2_table_alloc_pages(nid,
                        bus_offset, page_shift, window_size,
-                       levels, tbl);
+                       levels, alloc_userspace_copy, tbl);
        if (ret) {
                iommu_tce_table_put(tbl);
                return ret;
        rc = pnv_pci_ioda2_create_table(&pe->table_group, 0,
                        IOMMU_PAGE_SHIFT_4K,
                        window_size,
-                       POWERNV_IOMMU_DEFAULT_LEVELS, &tbl);
+                       POWERNV_IOMMU_DEFAULT_LEVELS, false, &tbl);
        if (rc) {
                pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
                                rc);
                                tce_table_size, direct_table_size);
        }
 
-       return bytes;
+       return bytes + bytes; /* one for HW table, one for userspace copy */
+}
+
+static long pnv_pci_ioda2_create_table_userspace(
+               struct iommu_table_group *table_group,
+               int num, __u32 page_shift, __u64 window_size, __u32 levels,
+               struct iommu_table **ptbl)
+{
+       return pnv_pci_ioda2_create_table(table_group,
+                       num, page_shift, window_size, levels, true, ptbl);
 }
 
 static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
 
 static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
        .get_table_size = pnv_pci_ioda2_get_table_size,
-       .create_table = pnv_pci_ioda2_create_table,
+       .create_table = pnv_pci_ioda2_create_table_userspace,
        .set_window = pnv_pci_ioda2_set_window,
        .unset_window = pnv_pci_ioda2_unset_window,
        .take_ownership = pnv_ioda2_take_ownership,
 
 static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = {
        .get_table_size = pnv_pci_ioda2_get_table_size,
-       .create_table = pnv_pci_ioda2_create_table,
+       .create_table = pnv_pci_ioda2_create_table_userspace,
        .set_window = pnv_pci_ioda2_npu_set_window,
        .unset_window = pnv_pci_ioda2_npu_unset_window,
        .take_ownership = pnv_ioda2_npu_take_ownership,
 
 extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
 extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
                unsigned long *hpa, enum dma_data_direction *direction);
+extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index);
 extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
 
 extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
                __u32 page_shift, __u64 window_size, __u32 levels,
-               struct iommu_table *tbl);
+               bool alloc_userspace_copy, struct iommu_table *tbl);
 extern void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
 
 extern long pnv_pci_link_table_and_group(int node, int num,
 
        return 0;
 }
 
-static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl,
-               struct mm_struct *mm)
-{
-       unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
-                       tbl->it_size, PAGE_SIZE);
-       unsigned long *uas;
-       long ret;
-
-       BUG_ON(tbl->it_userspace);
-
-       ret = try_increment_locked_vm(mm, cb >> PAGE_SHIFT);
-       if (ret)
-               return ret;
-
-       uas = vzalloc(cb);
-       if (!uas) {
-               decrement_locked_vm(mm, cb >> PAGE_SHIFT);
-               return -ENOMEM;
-       }
-       tbl->it_userspace = (__be64 *) uas;
-
-       return 0;
-}
-
-static void tce_iommu_userspace_view_free(struct iommu_table *tbl,
-               struct mm_struct *mm)
-{
-       unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
-                       tbl->it_size, PAGE_SIZE);
-
-       if (!tbl->it_userspace)
-               return;
-
-       vfree(tbl->it_userspace);
-       tbl->it_userspace = NULL;
-       decrement_locked_vm(mm, cb >> PAGE_SHIFT);
-}
-
 static bool tce_page_is_contained(struct page *page, unsigned page_shift)
 {
        /*
        unsigned long hpa;
        enum dma_data_direction dirtmp;
 
-       if (!tbl->it_userspace) {
-               ret = tce_iommu_userspace_view_alloc(tbl, container->mm);
-               if (ret)
-                       return ret;
-       }
-
        for (i = 0; i < pages; ++i) {
                struct mm_iommu_table_group_mem_t *mem = NULL;
                __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i);
 {
        unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
 
-       tce_iommu_userspace_view_free(tbl, container->mm);
        iommu_tce_table_put(tbl);
        decrement_locked_vm(container->mm, pages);
 }
                        continue;
 
                tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
-               tce_iommu_userspace_view_free(tbl, container->mm);
                if (tbl->it_map)
                        iommu_release_ownership(tbl);