* entries exist.
*/
#define flush_tlb_fix_spurious_fault(vma, address, ptep) \
- local_flush_tlb_page_nonotify(vma, address)
+ __flush_tlb_page(vma, address, TLBF_NOBROADCAST | TLBF_NONOTIFY)
-#define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) \
- local_flush_tlb_page_nonotify(vma, address)
+#define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) \
+ __flush_tlb_range(vma, address, address + PMD_SIZE, PMD_SIZE, 2, \
+ TLBF_NOBROADCAST | TLBF_NONOTIFY | TLBF_NOWALKCACHE)
- /*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
- extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
- #define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page))
-
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e))
* supported.
*/
struct net_iov {
- union {
- struct netmem_desc desc;
-
- /* XXX: The following part should be removed once all
- * the references to them are converted so as to be
- * accessed via netmem_desc e.g. niov->desc.pp instead
- * of niov->pp.
- */
- struct {
- unsigned long _flags;
- unsigned long pp_magic;
- struct page_pool *pp;
- unsigned long _pp_mapping_pad;
- unsigned long dma_addr;
- atomic_long_t pp_ref_count;
- };
- };
-
+ struct netmem_desc desc;
- struct net_iov_area *owner;
+ unsigned int page_type;
enum net_iov_type type;
+ struct net_iov_area *owner;
};
+ /* Make sure 'the offset of page_type in struct page == the offset of
+ * type in struct net_iov'.
+ */
+ #define NET_IOV_ASSERT_OFFSET(pg, iov) \
+ static_assert(offsetof(struct page, pg) == \
+ offsetof(struct net_iov, iov))
+ NET_IOV_ASSERT_OFFSET(page_type, page_type);
+ #undef NET_IOV_ASSERT_OFFSET
+
struct net_iov_area {
/* Array of net_iovs for this area. */
struct net_iov *niovs;
--- /dev/null
- zap_vma_pages(vma);
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author: Andrei Vagin <avagin@openvz.org>
+ * Author: Dmitry Safonov <dima@arista.com>
+ */
+
+#include <linux/cleanup.h>
+#include <linux/mm.h>
+#include <linux/time_namespace.h>
+#include <linux/time.h>
+#include <linux/vdso_datastore.h>
+
+#include <vdso/clocksource.h>
+#include <vdso/datapage.h>
+
+#include "namespace_internal.h"
+
+static struct timens_offset offset_from_ts(struct timespec64 off)
+{
+ struct timens_offset ret;
+
+ ret.sec = off.tv_sec;
+ ret.nsec = off.tv_nsec;
+
+ return ret;
+}
+
+/*
+ * A time namespace VVAR page has the same layout as the VVAR page which
+ * contains the system wide VDSO data.
+ *
+ * For a normal task the VVAR pages are installed in the normal ordering:
+ * VVAR
+ * PVCLOCK
+ * HVCLOCK
+ * TIMENS <- Not really required
+ *
+ * Now for a timens task the pages are installed in the following order:
+ * TIMENS
+ * PVCLOCK
+ * HVCLOCK
+ * VVAR
+ *
+ * The check for vdso_clock->clock_mode is in the unlikely path of
+ * the seq begin magic. So for the non-timens case most of the time
+ * 'seq' is even, so the branch is not taken.
+ *
+ * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
+ * for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the
+ * update to finish and for 'seq' to become even anyway.
+ *
+ * Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which
+ * enforces the time namespace handling path.
+ */
+static void timens_setup_vdso_clock_data(struct vdso_clock *vc,
+ struct time_namespace *ns)
+{
+ struct timens_offset *offset = vc->offset;
+ struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
+ struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
+
+ vc->seq = 1;
+ vc->clock_mode = VDSO_CLOCKMODE_TIMENS;
+ offset[CLOCK_MONOTONIC] = monotonic;
+ offset[CLOCK_MONOTONIC_RAW] = monotonic;
+ offset[CLOCK_MONOTONIC_COARSE] = monotonic;
+ offset[CLOCK_BOOTTIME] = boottime;
+ offset[CLOCK_BOOTTIME_ALARM] = boottime;
+}
+
+struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ if (likely(vma->vm_mm == current->mm))
+ return current->nsproxy->time_ns->vvar_page;
+
+ /*
+ * VM_PFNMAP | VM_IO protect .fault() handler from being called
+ * through interfaces like /proc/$pid/mem or
+ * process_vm_{readv,writev}() as long as there's no .access()
+ * in special_mapping_vmops().
+ * For more details check_vma_flags() and __access_remote_vm()
+ */
+
+ WARN(1, "vvar_page accessed remotely");
+
+ return NULL;
+}
+
+static void timens_set_vvar_page(struct task_struct *task,
+ struct time_namespace *ns)
+{
+ struct vdso_time_data *vdata;
+ struct vdso_clock *vc;
+ unsigned int i;
+
+ if (ns == &init_time_ns)
+ return;
+
+ /* Fast-path, taken by every task in namespace except the first. */
+ if (likely(ns->frozen_offsets))
+ return;
+
+ guard(mutex)(&timens_offset_lock);
+ /* Nothing to-do: vvar_page has been already initialized. */
+ if (ns->frozen_offsets)
+ return;
+
+ ns->frozen_offsets = true;
+ vdata = page_address(ns->vvar_page);
+ vc = vdata->clock_data;
+
+ for (i = 0; i < CS_BASES; i++)
+ timens_setup_vdso_clock_data(&vc[i], ns);
+
+ if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) {
+ for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++)
+ timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns);
+ }
+}
+
+/*
+ * The vvar page layout depends on whether a task belongs to the root or
+ * non-root time namespace. Whenever a task changes its namespace, the VVAR
+ * page tables are cleared and then they will be re-faulted with a
+ * corresponding layout.
+ * See also the comment near timens_setup_vdso_clock_data() for details.
+ */
+static int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+ struct mm_struct *mm = task->mm;
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
+
+ guard(mmap_read_lock)(mm);
+ for_each_vma(vmi, vma) {
+ if (vma_is_special_mapping(vma, &vdso_vvar_mapping))
++ zap_vma(vma);
+ }
+ return 0;
+}
+
+void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
+{
+ timens_set_vvar_page(tsk, ns);
+ vdso_join_timens(tsk, ns);
+}
+
+int timens_vdso_alloc_vvar_page(struct time_namespace *ns)
+{
+ ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!ns->vvar_page)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void timens_vdso_free_vvar_page(struct time_namespace *ns)
+{
+ __free_page(ns->vvar_page);
+}