]> git.ipfire.org Git - thirdparty/libvirt.git/commitdiff
qemu: add memfd source type
authorMarc-André Lureau <marcandre.lureau@redhat.com>
Thu, 15 Nov 2018 11:55:53 +0000 (15:55 +0400)
committerMichal Privoznik <mprivozn@redhat.com>
Fri, 16 Nov 2018 07:57:12 +0000 (08:57 +0100)
Add a new memoryBacking source type "memfd", supported by QEMU (when
the capability is available).

A memfd is a specialized anonymous memory kind. As such, an anonymous
source type could be automatically using a memfd. However, there are
some complications when migrating from different memory backends in
qemu (mainly due to the internal object naming at this point, but
there could be more). For now, it is simpler and safer to simply
introduce a new source type "memfd". Eventually, the "anonymous" type
could learn to use memfd transparently in a separate change.

The main benefits are that it doesn't need to create filesystem files,
and it also enforces sealing, providing a bit more safety.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
docs/formatdomain.html.in
docs/schemas/domaincommon.rng
src/conf/domain_conf.c
src/conf/domain_conf.h
src/qemu/qemu_command.c
src/qemu/qemu_domain.c
tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args [new file with mode: 0644]
tests/qemuxml2argvdata/memfd-memory-numa.xml [new file with mode: 0644]
tests/qemuxml2argvtest.c

index 8a23b785dd96ac17b5b6c125f8538c5933717a63..8d5edbcca54d5626a728224f8a54396a782e90fc 100644 (file)
     &lt;/hugepages&gt;
     &lt;nosharepages/&gt;
     &lt;locked/&gt;
-    &lt;source type="file|anonymous"/&gt;
+    &lt;source type="file|anonymous|memfd"/&gt;
     &lt;access mode="shared|private"/&gt;
     &lt;allocation mode="immediate|ondemand"/&gt;
     &lt;discard/&gt;
         suitable for the specific environment at the same time to mitigate
         the risks described above. <span class="since">Since 1.0.6</span></dd>
        <dt><code>source</code></dt>
-       <dd>Using the <code>type</code> attribute, it's possible to provide
-         "file" to utilize file memorybacking or keep the default
-         "anonymous".</dd>
+       <dd>Using the <code>type</code> attribute, it's possible to
+       provide "file" to utilize file memorybacking or keep the
+       default "anonymous". <span class="since">Since 4.10.0</span>,
+       you may choose "memfd" backing. (QEMU/KVM only)</dd>
        <dt><code>access</code></dt>
        <dd>Using the <code>mode</code> attribute, specify if the memory is
          to be "shared" or "private". This can be overridden per numa node by
index fa934dfba04d06cf97d39f20f2e9be695aabdee9..5ee727eefa3d2d5d69c6e685673c0a8211dd4a33 100644 (file)
                   <choice>
                     <value>file</value>
                     <value>anonymous</value>
+                    <value>memfd</value>
                   </choice>
                 </attribute>
               </element>
index 793bbe1fbdc19d74ac08df12d6990e3e29b6360c..c3dbba691966978bd87997bafb2a347d7b84b96b 100644 (file)
@@ -898,7 +898,8 @@ VIR_ENUM_IMPL(virDomainDiskMirrorState, VIR_DOMAIN_DISK_MIRROR_STATE_LAST,
 VIR_ENUM_IMPL(virDomainMemorySource, VIR_DOMAIN_MEMORY_SOURCE_LAST,
               "none",
               "file",
-              "anonymous")
+              "anonymous",
+              "memfd")
 
 VIR_ENUM_IMPL(virDomainMemoryAllocation, VIR_DOMAIN_MEMORY_ALLOCATION_LAST,
               "none",
index c167f8c43ce1119cfb8c09899eff9b52c20319ab..467785cd83033f25be3d3d2f1ca4705d3cd10856 100644 (file)
@@ -607,6 +607,7 @@ typedef enum {
     VIR_DOMAIN_MEMORY_SOURCE_NONE = 0,  /* No memory source defined */
     VIR_DOMAIN_MEMORY_SOURCE_FILE,      /* Memory source is set as file */
     VIR_DOMAIN_MEMORY_SOURCE_ANONYMOUS, /* Memory source is set as anonymous */
+    VIR_DOMAIN_MEMORY_SOURCE_MEMFD,     /* Memory source is set as memfd */
 
     VIR_DOMAIN_MEMORY_SOURCE_LAST,
 } virDomainMemorySource;
index 9aea5af26f21c5a356865a5be7a47e4fe9b478c1..23a6661c10d22e09fc626beb4cdaf556a36253a8 100644 (file)
@@ -3176,6 +3176,26 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd,
 }
 
 
+static int
+qemuBuildMemoryBackendPropsShare(virJSONValuePtr props,
+                                 virDomainMemoryAccess memAccess)
+{
+    switch (memAccess) {
+    case VIR_DOMAIN_MEMORY_ACCESS_SHARED:
+        return virJSONValueObjectAdd(props, "b:share", true, NULL);
+
+    case VIR_DOMAIN_MEMORY_ACCESS_PRIVATE:
+        return virJSONValueObjectAdd(props, "b:share", false, NULL);
+
+    case VIR_DOMAIN_MEMORY_ACCESS_DEFAULT:
+    case VIR_DOMAIN_MEMORY_ACCESS_LAST:
+        break;
+    }
+
+    return 0;
+}
+
+
 /**
  * qemuBuildMemoryBackendProps:
  * @backendProps: [out] constructed object
@@ -3195,7 +3215,7 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd,
  * configuration value of 1 is returned. This behaviour can be suppressed by
  * setting @force to true in which case 0 would be returned.
  *
- * Then, if one of the two memory-backend-* should be used, the @qemuCaps is
+ * Then, if one of the three memory-backend-* should be used, the @qemuCaps is
  * consulted to check if qemu does support it.
  *
  * Returns: 0 on success,
@@ -3321,7 +3341,19 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps,
     if (!(props = virJSONValueNewObject()))
         return -1;
 
-    if (useHugepage || mem->nvdimmPath || memAccess ||
+    if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_MEMFD) {
+        backendType = "memory-backend-memfd";
+
+        if (useHugepage &&
+            (virJSONValueObjectAdd(props, "b:hugetlb", useHugepage, NULL) < 0 ||
+             virJSONValueObjectAdd(props, "U:hugetlbsize", pagesize << 10, NULL) < 0)) {
+            goto cleanup;
+        }
+
+        if (qemuBuildMemoryBackendPropsShare(props, memAccess) < 0)
+            goto cleanup;
+
+    } else if (useHugepage || mem->nvdimmPath || memAccess ||
         def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) {
 
         if (mem->nvdimmPath) {
@@ -3359,21 +3391,8 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps,
                 goto cleanup;
         }
 
-        switch (memAccess) {
-        case VIR_DOMAIN_MEMORY_ACCESS_SHARED:
-            if (virJSONValueObjectAdd(props, "b:share", true, NULL) < 0)
-                goto cleanup;
-            break;
-
-        case VIR_DOMAIN_MEMORY_ACCESS_PRIVATE:
-            if (virJSONValueObjectAdd(props, "b:share", false, NULL) < 0)
-                goto cleanup;
-            break;
-
-        case VIR_DOMAIN_MEMORY_ACCESS_DEFAULT:
-        case VIR_DOMAIN_MEMORY_ACCESS_LAST:
-            break;
-        }
+        if (qemuBuildMemoryBackendPropsShare(props, memAccess) < 0)
+            goto cleanup;
     } else {
         backendType = "memory-backend-ram";
     }
@@ -3403,7 +3422,9 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps,
     if (!needHugepage && !mem->sourceNodes && !nodeSpecified &&
         !mem->nvdimmPath &&
         memAccess == VIR_DOMAIN_MEMORY_ACCESS_DEFAULT &&
-        def->mem.source != VIR_DOMAIN_MEMORY_SOURCE_FILE && !force) {
+        def->mem.source != VIR_DOMAIN_MEMORY_SOURCE_FILE &&
+        def->mem.source != VIR_DOMAIN_MEMORY_SOURCE_MEMFD &&
+        !force) {
         /* report back that using the new backend is not necessary
          * to achieve the desired configuration */
         ret = 1;
@@ -3421,6 +3442,12 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps,
                            _("this qemu doesn't support the "
                              "memory-backend-ram object"));
             goto cleanup;
+        } else if (STREQ(backendType, "memory-backend-memory") &&
+                   !virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_MEMFD)) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("this qemu doesn't support the "
+                             "memory-backend-memfd object"));
+            goto cleanup;
         }
 
         ret = 0;
@@ -7654,7 +7681,8 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
 
     if (virDomainNumatuneHasPerNodeBinding(def->numa) &&
         !(virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM) ||
-          virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE))) {
+          virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE) ||
+          virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_MEMFD))) {
         virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                        _("Per-node memory binding is not supported "
                          "with this QEMU"));
@@ -7680,7 +7708,8 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
      * need to check which approach to use */
     for (i = 0; i < ncells; i++) {
         if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM) ||
-            virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE)) {
+            virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE) ||
+            virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_MEMFD)) {
 
             if ((rc = qemuBuildMemoryCellBackendStr(def, cfg, i, priv,
                                                     &nodeBackends[i])) < 0)
index fbe63e2e1dc1896501083d309c88793ca850274c..1beb5969c191bcb7db62a625c005223624e91c86 100644 (file)
@@ -3952,7 +3952,8 @@ qemuDomainDefValidateFeatures(const virDomainDef *def,
 
 
 static int
-qemuDomainDefValidateMemory(const virDomainDef *def)
+qemuDomainDefValidateMemory(const virDomainDef *def,
+                            virQEMUCapsPtr qemuCaps)
 {
     const long system_page_size = virGetSystemPageSizeKB();
     const virDomainMemtune *mem = &def->mem;
@@ -3974,6 +3975,13 @@ qemuDomainDefValidateMemory(const virDomainDef *def)
         return -1;
     }
 
+    if (mem->source == VIR_DOMAIN_MEMORY_SOURCE_MEMFD &&
+        !virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_MEMFD_HUGETLB)) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                       _("hugepages is not support with memfd memory source"));
+        return -1;
+    }
+
     /* We can't guarantee any other mem.access
      * if no guest NUMA nodes are defined. */
     if (mem->hugepages[0].size != system_page_size &&
@@ -4139,7 +4147,7 @@ qemuDomainDefValidate(const virDomainDef *def,
     if (qemuDomainDefValidateFeatures(def, qemuCaps) < 0)
         goto cleanup;
 
-    if (qemuDomainDefValidateMemory(def) < 0)
+    if (qemuDomainDefValidateMemory(def, qemuCaps) < 0)
         goto cleanup;
 
     ret = 0;
diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args b/tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args
new file mode 100644 (file)
index 0000000..d0f4057
--- /dev/null
@@ -0,0 +1,34 @@
+LC_ALL=C \
+PATH=/bin \
+HOME=/home/test \
+USER=test \
+LOGNAME=test \
+QEMU_AUDIO_DRV=none \
+/usr/bin/qemu-system-x86_64 \
+-name guest=instance-00000092,debug-threads=on \
+-S \
+-object secret,id=masterKey0,format=raw,\
+file=/tmp/lib/domain--1-instance-00000092/master-key.aes \
+-machine pc-i440fx-wily,accel=kvm,usb=off,dump-guest-core=off \
+-m 14336 \
+-mem-prealloc \
+-realtime mlock=off \
+-smp 8,sockets=1,cores=8,threads=1 \
+-object memory-backend-memfd,id=ram-node0,hugetlb=yes,hugetlbsize=2097152,\
+share=yes,size=15032385536,host-nodes=3,policy=preferred \
+-numa node,nodeid=0,cpus=0-7,memdev=ram-node0 \
+-uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \
+-display none \
+-no-user-config \
+-nodefaults \
+-chardev socket,id=charmonitor,fd=1729,server,nowait \
+-mon chardev=charmonitor,id=monitor,mode=control \
+-rtc base=utc \
+-no-shutdown \
+-no-acpi \
+-boot strict=on \
+-device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \
+-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x2 \
+-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\
+resourcecontrol=deny \
+-msg timestamp=on
diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.xml b/tests/qemuxml2argvdata/memfd-memory-numa.xml
new file mode 100644 (file)
index 0000000..8416a99
--- /dev/null
@@ -0,0 +1,36 @@
+  <domain type='kvm' id='56'>
+    <name>instance-00000092</name>
+    <uuid>126f2720-6f8e-45ab-a886-ec9277079a67</uuid>
+    <memory unit='KiB'>14680064</memory>
+    <currentMemory unit='KiB'>14680064</currentMemory>
+    <memoryBacking>
+      <hugepages>
+          <page size="2" unit="M"/>
+      </hugepages>
+      <source type='memfd'/>
+      <access mode='shared'/>
+      <allocation mode='immediate'/>
+    </memoryBacking>
+    <numatune>
+        <memnode cellid='0' mode='preferred' nodeset='3'/>
+    </numatune>
+    <vcpu placement='static'>8</vcpu>
+    <os>
+      <type arch='x86_64' machine='pc-i440fx-wily'>hvm</type>
+      <boot dev='hd'/>
+    </os>
+    <cpu>
+      <topology sockets='1' cores='8' threads='1'/>
+      <numa>
+        <cell id='0' cpus='0-7' memory='14680064' unit='KiB'/>
+      </numa>
+    </cpu>
+    <clock offset='utc'/>
+    <on_poweroff>destroy</on_poweroff>
+    <on_reboot>restart</on_reboot>
+    <on_crash>destroy</on_crash>
+    <devices>
+      <emulator>/usr/bin/qemu-system-x86_64</emulator>
+      <memballoon model='virtio'/>
+    </devices>
+  </domain>
index bf164f5f8d132bf5f7d4d4109b015db81fe0029b..95429b3ae741547e95bf5261edaa011e4986795c 100644 (file)
@@ -2974,6 +2974,8 @@ mymain(void)
     DO_TEST("fd-memory-no-numa-topology", QEMU_CAPS_OBJECT_MEMORY_FILE,
             QEMU_CAPS_KVM);
 
+    DO_TEST_CAPS_LATEST("memfd-memory-numa");
+
     DO_TEST("cpu-check-none", QEMU_CAPS_KVM);
     DO_TEST("cpu-check-partial", QEMU_CAPS_KVM);
     DO_TEST("cpu-check-full", QEMU_CAPS_KVM);