NUM_EXPECTED_SYNCS(2));
}
+static void arm_smmu_v3_invs_test_verify(struct kunit *test,
+ struct arm_smmu_invs *invs,
+ int num_invs, const int num_trashes,
+ const int *ids, const int *users,
+ const int *ssids)
+{
+ KUNIT_EXPECT_EQ(test, invs->num_invs, num_invs);
+ KUNIT_EXPECT_EQ(test, invs->num_trashes, num_trashes);
+ while (num_invs--) {
+ KUNIT_EXPECT_EQ(test, invs->inv[num_invs].id, ids[num_invs]);
+ KUNIT_EXPECT_EQ(test, READ_ONCE(invs->inv[num_invs].users),
+ users[num_invs]);
+ KUNIT_EXPECT_EQ(test, invs->inv[num_invs].ssid, ssids[num_invs]);
+ }
+}
+
+static struct arm_smmu_invs invs1 = {
+ .num_invs = 3,
+ .inv = { { .type = INV_TYPE_S2_VMID, .id = 1, },
+ { .type = INV_TYPE_S2_VMID_S1_CLEAR, .id = 1, },
+ { .type = INV_TYPE_ATS, .id = 3, }, },
+};
+
+static struct arm_smmu_invs invs2 = {
+ .num_invs = 3,
+ .inv = { { .type = INV_TYPE_S2_VMID, .id = 1, }, /* duplicated */
+ { .type = INV_TYPE_ATS, .id = 4, },
+ { .type = INV_TYPE_ATS, .id = 5, }, },
+};
+
+static struct arm_smmu_invs invs3 = {
+ .num_invs = 3,
+ .inv = { { .type = INV_TYPE_S2_VMID, .id = 1, }, /* duplicated */
+ { .type = INV_TYPE_ATS, .id = 5, }, /* recover a trash */
+ { .type = INV_TYPE_ATS, .id = 6, }, },
+};
+
+static struct arm_smmu_invs invs4 = {
+ .num_invs = 3,
+ .inv = { { .type = INV_TYPE_ATS, .id = 10, .ssid = 1 },
+ { .type = INV_TYPE_ATS, .id = 10, .ssid = 3 },
+ { .type = INV_TYPE_ATS, .id = 12, .ssid = 1 }, },
+};
+
+static struct arm_smmu_invs invs5 = {
+ .num_invs = 3,
+ .inv = { { .type = INV_TYPE_ATS, .id = 10, .ssid = 2 },
+ { .type = INV_TYPE_ATS, .id = 10, .ssid = 3 }, /* duplicate */
+ { .type = INV_TYPE_ATS, .id = 12, .ssid = 2 }, },
+};
+
+static void arm_smmu_v3_invs_test(struct kunit *test)
+{
+ const int results1[3][3] = { { 1, 1, 3, }, { 1, 1, 1, }, { 0, 0, 0, } };
+ const int results2[3][5] = { { 1, 1, 3, 4, 5, }, { 2, 1, 1, 1, 1, }, { 0, 0, 0, 0, 0, } };
+ const int results3[3][3] = { { 1, 1, 3, }, { 1, 1, 1, }, { 0, 0, 0, } };
+ const int results4[3][5] = { { 1, 1, 3, 5, 6, }, { 2, 1, 1, 1, 1, }, { 0, 0, 0, 0, 0, } };
+ const int results5[3][5] = { { 1, 1, 3, 5, 6, }, { 1, 0, 0, 1, 1, }, { 0, 0, 0, 0, 0, } };
+ const int results6[3][3] = { { 1, 5, 6, }, { 1, 1, 1, }, { 0, 0, 0, } };
+ const int results7[3][3] = { { 10, 10, 12, }, { 1, 1, 1, }, { 1, 3, 1, } };
+ const int results8[3][5] = { { 10, 10, 10, 12, 12, }, { 1, 1, 2, 1, 1, }, { 1, 2, 3, 1, 2, } };
+ const int results9[3][4] = { { 10, 10, 10, 12, }, { 1, 0, 1, 1, }, { 1, 2, 3, 1, } };
+ const int results10[3][3] = { { 10, 10, 12, }, { 1, 1, 1, }, { 1, 3, 1, } };
+ struct arm_smmu_invs *test_a, *test_b;
+
+ /* New array */
+ test_a = arm_smmu_invs_alloc(0);
+ KUNIT_EXPECT_EQ(test, test_a->num_invs, 0);
+
+ /* Test1: merge invs1 (new array) */
+ test_b = arm_smmu_invs_merge(test_a, &invs1);
+ kfree(test_a);
+ arm_smmu_v3_invs_test_verify(test, test_b, ARRAY_SIZE(results1[0]), 0,
+ results1[0], results1[1], results1[2]);
+
+ /* Test2: merge invs2 (new array) */
+ test_a = arm_smmu_invs_merge(test_b, &invs2);
+ kfree(test_b);
+ arm_smmu_v3_invs_test_verify(test, test_a, ARRAY_SIZE(results2[0]), 0,
+ results2[0], results2[1], results2[2]);
+
+ /* Test3: unref invs2 (same array) */
+ arm_smmu_invs_unref(test_a, &invs2);
+ arm_smmu_v3_invs_test_verify(test, test_a, ARRAY_SIZE(results3[0]), 0,
+ results3[0], results3[1], results3[2]);
+
+ /* Test4: merge invs3 (new array) */
+ test_b = arm_smmu_invs_merge(test_a, &invs3);
+ kfree(test_a);
+ arm_smmu_v3_invs_test_verify(test, test_b, ARRAY_SIZE(results4[0]), 0,
+ results4[0], results4[1], results4[2]);
+
+ /* Test5: unref invs1 (same array) */
+ arm_smmu_invs_unref(test_b, &invs1);
+ arm_smmu_v3_invs_test_verify(test, test_b, ARRAY_SIZE(results5[0]), 2,
+ results5[0], results5[1], results5[2]);
+
+ /* Test6: purge test_b (new array) */
+ test_a = arm_smmu_invs_purge(test_b);
+ kfree(test_b);
+ arm_smmu_v3_invs_test_verify(test, test_a, ARRAY_SIZE(results6[0]), 0,
+ results6[0], results6[1], results6[2]);
+
+ /* Test7: unref invs3 (same array) */
+ arm_smmu_invs_unref(test_a, &invs3);
+ KUNIT_EXPECT_EQ(test, test_a->num_invs, 0);
+ KUNIT_EXPECT_EQ(test, test_a->num_trashes, 0);
+
+ /* Test8: merge invs4 (new array) */
+ test_b = arm_smmu_invs_merge(test_a, &invs4);
+ kfree(test_a);
+ arm_smmu_v3_invs_test_verify(test, test_b, ARRAY_SIZE(results7[0]), 0,
+ results7[0], results7[1], results7[2]);
+
+ /* Test9: merge invs5 (new array) */
+ test_a = arm_smmu_invs_merge(test_b, &invs5);
+ kfree(test_b);
+ arm_smmu_v3_invs_test_verify(test, test_a, ARRAY_SIZE(results8[0]), 0,
+ results8[0], results8[1], results8[2]);
+
+ /* Test10: unref invs5 (same array) */
+ arm_smmu_invs_unref(test_a, &invs5);
+ arm_smmu_v3_invs_test_verify(test, test_a, ARRAY_SIZE(results9[0]), 1,
+ results9[0], results9[1], results9[2]);
+
+ /* Test11: purge test_a (new array) */
+ test_b = arm_smmu_invs_purge(test_a);
+ kfree(test_a);
+ arm_smmu_v3_invs_test_verify(test, test_b, ARRAY_SIZE(results10[0]), 0,
+ results10[0], results10[1], results10[2]);
+
+ kfree(test_b);
+}
+
static struct kunit_case arm_smmu_v3_test_cases[] = {
KUNIT_CASE(arm_smmu_v3_write_ste_test_bypass_to_abort),
KUNIT_CASE(arm_smmu_v3_write_ste_test_abort_to_bypass),
KUNIT_CASE(arm_smmu_v3_write_ste_test_nested_s1bypass_to_s1dssbypass),
KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_clear),
KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_release),
+ KUNIT_CASE(arm_smmu_v3_invs_test),
{},
};
#include <linux/pci.h>
#include <linux/pci-ats.h>
#include <linux/platform_device.h>
+#include <linux/sort.h>
#include <linux/string_choices.h>
#include <kunit/visibility.h>
#include <uapi/linux/iommufd.h>
*/
}
+/* Invalidation array manipulation functions */
+static inline struct arm_smmu_inv *
+arm_smmu_invs_iter_next(struct arm_smmu_invs *invs, size_t next, size_t *idx)
+{
+ while (true) {
+ if (next >= invs->num_invs) {
+ *idx = next;
+ return NULL;
+ }
+ if (!READ_ONCE(invs->inv[next].users)) {
+ next++;
+ continue;
+ }
+ *idx = next;
+ return &invs->inv[next];
+ }
+}
+
+/**
+ * arm_smmu_invs_for_each_entry - Iterate over all non-trash entries in invs
+ * @invs: the base invalidation array
+ * @idx: a stack variable of 'size_t', to store the array index
+ * @cur: a stack variable of 'struct arm_smmu_inv *'
+ */
+#define arm_smmu_invs_for_each_entry(invs, idx, cur) \
+ for (cur = arm_smmu_invs_iter_next(invs, 0, &(idx)); cur; \
+ cur = arm_smmu_invs_iter_next(invs, idx + 1, &(idx)))
+
+static int arm_smmu_inv_cmp(const struct arm_smmu_inv *inv_l,
+ const struct arm_smmu_inv *inv_r)
+{
+ if (inv_l->smmu != inv_r->smmu)
+ return cmp_int((uintptr_t)inv_l->smmu, (uintptr_t)inv_r->smmu);
+ if (inv_l->type != inv_r->type)
+ return cmp_int(inv_l->type, inv_r->type);
+ if (inv_l->id != inv_r->id)
+ return cmp_int(inv_l->id, inv_r->id);
+ if (arm_smmu_inv_is_ats(inv_l))
+ return cmp_int(inv_l->ssid, inv_r->ssid);
+ return 0;
+}
+
+static inline int arm_smmu_invs_iter_next_cmp(struct arm_smmu_invs *invs_l,
+ size_t next_l, size_t *idx_l,
+ struct arm_smmu_invs *invs_r,
+ size_t next_r, size_t *idx_r)
+{
+ struct arm_smmu_inv *cur_l =
+ arm_smmu_invs_iter_next(invs_l, next_l, idx_l);
+
+ /*
+ * We have to update the idx_r manually, because the invs_r cannot call
+ * arm_smmu_invs_iter_next() as the invs_r never sets any users counter.
+ */
+ *idx_r = next_r;
+
+ /*
+ * Compare of two sorted arrays items. If one side is past the end of
+ * the array, return the other side to let it run out the iteration.
+ *
+ * If the left entry is empty, return 1 to pick the right entry.
+ * If the right entry is empty, return -1 to pick the left entry.
+ */
+ if (!cur_l)
+ return 1;
+ if (next_r >= invs_r->num_invs)
+ return -1;
+ return arm_smmu_inv_cmp(cur_l, &invs_r->inv[next_r]);
+}
+
+/**
+ * arm_smmu_invs_for_each_cmp - Iterate over two sorted arrays computing for
+ * arm_smmu_invs_merge() or arm_smmu_invs_unref()
+ * @invs_l: the base invalidation array
+ * @idx_l: a stack variable of 'size_t', to store the base array index
+ * @invs_r: the build_invs array as to_merge or to_unref
+ * @idx_r: a stack variable of 'size_t', to store the build_invs index
+ * @cmp: a stack variable of 'int', to store return value (-1, 0, or 1)
+ */
+#define arm_smmu_invs_for_each_cmp(invs_l, idx_l, invs_r, idx_r, cmp) \
+ for (idx_l = idx_r = 0, \
+ cmp = arm_smmu_invs_iter_next_cmp(invs_l, 0, &(idx_l), \
+ invs_r, 0, &(idx_r)); \
+ idx_l < invs_l->num_invs || idx_r < invs_r->num_invs; \
+ cmp = arm_smmu_invs_iter_next_cmp( \
+ invs_l, idx_l + (cmp <= 0 ? 1 : 0), &(idx_l), \
+ invs_r, idx_r + (cmp >= 0 ? 1 : 0), &(idx_r)))
+
+/**
+ * arm_smmu_invs_merge() - Merge @to_merge into @invs and generate a new array
+ * @invs: the base invalidation array
+ * @to_merge: an array of invalidations to merge
+ *
+ * Return: a newly allocated array on success, or ERR_PTR
+ *
+ * This function must be locked and serialized with arm_smmu_invs_unref() and
+ * arm_smmu_invs_purge(), but do not lockdep on any lock for KUNIT test.
+ *
+ * Both @invs and @to_merge must be sorted, to ensure the returned array will be
+ * sorted as well.
+ *
+ * Caller is resposible for freeing the @invs and the returned new one.
+ *
+ * Entries marked as trash will be purged in the returned array.
+ */
+VISIBLE_IF_KUNIT
+struct arm_smmu_invs *arm_smmu_invs_merge(struct arm_smmu_invs *invs,
+ struct arm_smmu_invs *to_merge)
+{
+ struct arm_smmu_invs *new_invs;
+ struct arm_smmu_inv *new;
+ size_t num_invs = 0;
+ size_t i, j;
+ int cmp;
+
+ arm_smmu_invs_for_each_cmp(invs, i, to_merge, j, cmp)
+ num_invs++;
+
+ new_invs = arm_smmu_invs_alloc(num_invs);
+ if (!new_invs)
+ return ERR_PTR(-ENOMEM);
+
+ new = new_invs->inv;
+ arm_smmu_invs_for_each_cmp(invs, i, to_merge, j, cmp) {
+ if (cmp < 0) {
+ *new = invs->inv[i];
+ } else if (cmp == 0) {
+ *new = invs->inv[i];
+ WRITE_ONCE(new->users, READ_ONCE(new->users) + 1);
+ } else {
+ *new = to_merge->inv[j];
+ WRITE_ONCE(new->users, 1);
+ }
+
+ /*
+ * Check that the new array is sorted. This also validates that
+ * to_merge is sorted.
+ */
+ if (new != new_invs->inv)
+ WARN_ON_ONCE(arm_smmu_inv_cmp(new - 1, new) == 1);
+ if (arm_smmu_inv_is_ats(new))
+ new_invs->has_ats = true;
+ new++;
+ }
+
+ WARN_ON(new != new_invs->inv + new_invs->num_invs);
+
+ return new_invs;
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_merge);
+
+/**
+ * arm_smmu_invs_unref() - Find in @invs for all entries in @to_unref, decrease
+ * the user counts without deletions
+ * @invs: the base invalidation array
+ * @to_unref: an array of invalidations to decrease their user counts
+ *
+ * Return: the number of trash entries in the array, for arm_smmu_invs_purge()
+ *
+ * This function will not fail. Any entry with users=0 will be marked as trash,
+ * and caller will be notified about the trashed entry via @to_unref by setting
+ * a users=0.
+ *
+ * All tailing trash entries in the array will be dropped. And the size of the
+ * array will be trimmed properly. All trash entries in-between will remain in
+ * the @invs until being completely deleted by the next arm_smmu_invs_merge()
+ * or an arm_smmu_invs_purge() function call.
+ *
+ * This function must be locked and serialized with arm_smmu_invs_merge() and
+ * arm_smmu_invs_purge(), but do not lockdep on any mutex for KUNIT test.
+ *
+ * Note that the final @invs->num_invs might not reflect the actual number of
+ * invalidations due to trash entries. Any reader should take the read lock to
+ * iterate each entry and check its users counter till the last entry.
+ */
+VISIBLE_IF_KUNIT
+void arm_smmu_invs_unref(struct arm_smmu_invs *invs,
+ struct arm_smmu_invs *to_unref)
+{
+ unsigned long flags;
+ size_t num_invs = 0;
+ size_t i, j;
+ int cmp;
+
+ arm_smmu_invs_for_each_cmp(invs, i, to_unref, j, cmp) {
+ if (cmp < 0) {
+ /* not found in to_unref, leave alone */
+ num_invs = i + 1;
+ } else if (cmp == 0) {
+ int users = READ_ONCE(invs->inv[i].users) - 1;
+
+ if (WARN_ON(users < 0))
+ continue;
+
+ /* same item */
+ WRITE_ONCE(invs->inv[i].users, users);
+ if (users) {
+ WRITE_ONCE(to_unref->inv[j].users, 1);
+ num_invs = i + 1;
+ continue;
+ }
+
+ /* Notify the caller about the trash entry */
+ WRITE_ONCE(to_unref->inv[j].users, 0);
+ invs->num_trashes++;
+ } else {
+ /* item in to_unref is not in invs or already a trash */
+ WARN_ON(true);
+ }
+ }
+
+ /* Exclude any tailing trash */
+ invs->num_trashes -= invs->num_invs - num_invs;
+
+ /* The lock is required to fence concurrent ATS operations. */
+ write_lock_irqsave(&invs->rwlock, flags);
+ WRITE_ONCE(invs->num_invs, num_invs); /* Remove tailing trash entries */
+ write_unlock_irqrestore(&invs->rwlock, flags);
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_unref);
+
+/**
+ * arm_smmu_invs_purge() - Purge all the trash entries in the @invs
+ * @invs: the base invalidation array
+ *
+ * Return: a newly allocated array on success removing all the trash entries, or
+ * NULL if there is no trash entry in the array or if allocation failed
+ *
+ * This function must be locked and serialized with arm_smmu_invs_merge() and
+ * arm_smmu_invs_unref(), but do not lockdep on any lock for KUNIT test.
+ *
+ * Caller is resposible for freeing the @invs and the returned new one.
+ */
+VISIBLE_IF_KUNIT
+struct arm_smmu_invs *arm_smmu_invs_purge(struct arm_smmu_invs *invs)
+{
+ struct arm_smmu_invs *new_invs;
+ struct arm_smmu_inv *inv;
+ size_t i, num_invs = 0;
+
+ if (WARN_ON(invs->num_invs < invs->num_trashes))
+ return NULL;
+ if (!invs->num_invs || !invs->num_trashes)
+ return NULL;
+
+ new_invs = arm_smmu_invs_alloc(invs->num_invs - invs->num_trashes);
+ if (!new_invs)
+ return NULL;
+
+ arm_smmu_invs_for_each_entry(invs, i, inv) {
+ new_invs->inv[num_invs] = *inv;
+ if (arm_smmu_inv_is_ats(inv))
+ new_invs->has_ats = true;
+ num_invs++;
+ }
+
+ WARN_ON(num_invs != new_invs->num_invs);
+ return new_invs;
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_purge);
+
/* Context descriptor manipulation functions */
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
{
struct arm_smmu_domain *arm_smmu_domain_alloc(void)
{
struct arm_smmu_domain *smmu_domain;
+ struct arm_smmu_invs *new_invs;
smmu_domain = kzalloc_obj(*smmu_domain);
if (!smmu_domain)
return ERR_PTR(-ENOMEM);
+ new_invs = arm_smmu_invs_alloc(0);
+ if (!new_invs) {
+ kfree(smmu_domain);
+ return ERR_PTR(-ENOMEM);
+ }
+
INIT_LIST_HEAD(&smmu_domain->devices);
spin_lock_init(&smmu_domain->devices_lock);
+ rcu_assign_pointer(smmu_domain->invs, new_invs);
return smmu_domain;
}
int num;
};
+/*
+ * The order here also determines the sequence in which commands are sent to the
+ * command queue. E.g. TLBI must be done before ATC_INV.
+ */
+enum arm_smmu_inv_type {
+ INV_TYPE_S1_ASID,
+ INV_TYPE_S2_VMID,
+ INV_TYPE_S2_VMID_S1_CLEAR,
+ INV_TYPE_ATS,
+ INV_TYPE_ATS_FULL,
+};
+
+struct arm_smmu_inv {
+ struct arm_smmu_device *smmu;
+ u8 type;
+ u8 size_opcode;
+ u8 nsize_opcode;
+ u32 id; /* ASID or VMID or SID */
+ union {
+ size_t pgsize; /* ARM_SMMU_FEAT_RANGE_INV */
+ u32 ssid; /* INV_TYPE_ATS */
+ };
+
+ int users; /* users=0 to mark as a trash to be purged */
+};
+
+static inline bool arm_smmu_inv_is_ats(const struct arm_smmu_inv *inv)
+{
+ return inv->type == INV_TYPE_ATS || inv->type == INV_TYPE_ATS_FULL;
+}
+
+/**
+ * struct arm_smmu_invs - Per-domain invalidation array
+ * @max_invs: maximum capacity of the flexible array
+ * @num_invs: number of invalidations in the flexible array. May be smaller than
+ * @max_invs after a tailing trash entry is excluded, but must not be
+ * greater than @max_invs
+ * @num_trashes: number of trash entries in the array for arm_smmu_invs_purge().
+ * Must not be greater than @num_invs
+ * @rwlock: optional rwlock to fench ATS operations
+ * @has_ats: flag if the array contains an INV_TYPE_ATS or INV_TYPE_ATS_FULL
+ * @rcu: rcu head for kfree_rcu()
+ * @inv: flexible invalidation array
+ *
+ * The arm_smmu_invs is an RCU data structure. During a ->attach_dev callback,
+ * arm_smmu_invs_merge(), arm_smmu_invs_unref() and arm_smmu_invs_purge() will
+ * be used to allocate a new copy of an old array for addition and deletion in
+ * the old domain's and new domain's invs arrays.
+ *
+ * The arm_smmu_invs_unref() mutates a given array, by internally reducing the
+ * users counts of some given entries. This exists to support a no-fail routine
+ * like attaching to an IOMMU_DOMAIN_BLOCKED. And it could pair with a followup
+ * arm_smmu_invs_purge() call to generate a new clean array.
+ *
+ * Concurrent invalidation thread will push every invalidation described in the
+ * array into the command queue for each invalidation event. It is designed like
+ * this to optimize the invalidation fast path by avoiding locks.
+ *
+ * A domain can be shared across SMMU instances. When an instance gets removed,
+ * it would delete all the entries that belong to that SMMU instance. Then, a
+ * synchronize_rcu() would have to be called to sync the array, to prevent any
+ * concurrent invalidation thread accessing the old array from issuing commands
+ * to the command queue of a removed SMMU instance.
+ */
+struct arm_smmu_invs {
+ size_t max_invs;
+ size_t num_invs;
+ size_t num_trashes;
+ rwlock_t rwlock;
+ bool has_ats;
+ struct rcu_head rcu;
+ struct arm_smmu_inv inv[] __counted_by(max_invs);
+};
+
+static inline struct arm_smmu_invs *arm_smmu_invs_alloc(size_t num_invs)
+{
+ struct arm_smmu_invs *new_invs;
+
+ new_invs = kzalloc(struct_size(new_invs, inv, num_invs), GFP_KERNEL);
+ if (!new_invs)
+ return NULL;
+ new_invs->max_invs = num_invs;
+ new_invs->num_invs = num_invs;
+ rwlock_init(&new_invs->rwlock);
+ return new_invs;
+}
+
struct arm_smmu_evtq {
struct arm_smmu_queue q;
struct iopf_queue *iopf;
struct iommu_domain domain;
+ struct arm_smmu_invs __rcu *invs;
+
/* List of struct arm_smmu_master_domain */
struct list_head devices;
spinlock_t devices_lock;
void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
struct arm_smmu_master *master, struct mm_struct *mm,
u16 asid);
+
+struct arm_smmu_invs *arm_smmu_invs_merge(struct arm_smmu_invs *invs,
+ struct arm_smmu_invs *to_merge);
+void arm_smmu_invs_unref(struct arm_smmu_invs *invs,
+ struct arm_smmu_invs *to_unref);
+struct arm_smmu_invs *arm_smmu_invs_purge(struct arm_smmu_invs *invs);
#endif
struct arm_smmu_master_domain {
static inline void arm_smmu_domain_free(struct arm_smmu_domain *smmu_domain)
{
+ /* No concurrency with invalidation is possible at this point */
+ kfree(rcu_dereference_protected(smmu_domain->invs, true));
kfree(smmu_domain);
}