]> git.ipfire.org Git - thirdparty/libvirt.git/commitdiff
qemu: revert latest pSeries NVDIMM design changes
authorDaniel Henrique Barboza <danielhb413@gmail.com>
Tue, 15 Sep 2020 02:42:56 +0000 (23:42 -0300)
committerAndrea Bolognani <abologna@redhat.com>
Tue, 22 Sep 2020 10:25:34 +0000 (12:25 +0200)
In [1], changes were made to remove the existing auto-alignment
for pSeries NVDIMM devices. That design promotes strange situations
where the NVDIMM size reported in the domain XML is different
from what QEMU is actually using. We removed the auto-alignment
and relied on standard size validation.

However, this goes against Libvirt design philosophy of not
tampering with existing guest behavior, as pointed out by Daniel
in [2]. Since we can't know for sure whether there are guests that
are relying on the auto-alignment feature to work, the changes
made in [1] are a direct violation of this rule.

This patch reverts [1] entirely, re-enabling auto-alignment for
pSeries NVDIMM as it was before. Changes will be made to ease
the limitations of this design without hurting existing
guests.

This reverts the following commits:

- commit 2d93cbdea9d1b8dbf36bc0ffee6cb73d83d208c7
  Revert "formatdomain.html.in: mention pSeries NVDIMM 'align down' mechanic"

- commit 0ee56369c8b4f2f898b6aa1ff1f51ab033be1c02
  qemu_domain.c: change qemuDomainMemoryDeviceAlignSize() return type

- commit 07de813924caf37e535855541c0c1183d9d382e2
  qemu_domain.c: do not auto-align ppc64 NVDIMMs

- commit 0ccceaa57c50e5ee528f7073fa8723afd62b88b7
  qemu_validate.c: add pSeries NVDIMM size alignment validation

- commit 4fa2202d884414ad34d9952e72fb39b1d93c7e14
  qemu_domain.c: make qemuDomainGetMemorySizeAlignment() public

[1] https://www.redhat.com/archives/libvir-list/2020-July/msg02010.html
[2] https://www.redhat.com/archives/libvir-list/2020-September/msg00572.html

Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Reviewed-by: Andrea Bolognani <abologna@redhat.com>
docs/formatdomain.rst
src/qemu/qemu_domain.c
src/qemu/qemu_domain.h
src/qemu/qemu_hotplug.c
src/qemu/qemu_validate.c
tests/qemuxml2argvdata/memory-hotplug-nvdimm-ppc64.ppc64-latest.args
tests/qemuxml2argvdata/memory-hotplug-nvdimm-ppc64.xml
tests/qemuxml2xmloutdata/memory-hotplug-nvdimm-ppc64.xml

index 18e237c1579fd5153cd54863659da43251c96272..d5930a4ac17e313aea5ba0b0a727c55ff32819a1 100644 (file)
@@ -7216,8 +7216,10 @@ Example: usage of the memory devices
       following restrictions apply:
 
       #. the minimum label size is 128KiB,
-      #. the remaining size (total-size - label-size) will be aligned
-         to 4KiB as default.
+      #. the remaining size (total-size - label-size), also called guest area,
+         will be aligned to 4KiB as default. For pSeries guests, the guest area
+         will be aligned down to 256MiB, and the minimum size of the guest area
+         must be at least 256MiB.
 
    ``readonly``
       The ``readonly`` element is used to mark the vNVDIMM as read-only. Only
index a73a77f2e9ee77d15dde419a60dc768e31adccc9..9e0d3a15b2dffb294e921682dc328c42ac146913 100644 (file)
@@ -8037,7 +8037,7 @@ qemuDomainAgentAvailable(virDomainObjPtr vm,
 }
 
 
-unsigned long long
+static unsigned long long
 qemuDomainGetMemorySizeAlignment(const virDomainDef *def)
 {
     /* PPC requires the memory sizes to be rounded to 256MiB increments, so
@@ -8067,6 +8067,44 @@ qemuDomainGetMemoryModuleSizeAlignment(const virDomainDef *def,
 }
 
 
+static int
+qemuDomainNVDimmAlignSizePseries(virDomainDefPtr def,
+                                 virDomainMemoryDefPtr mem)
+{
+    /* For NVDIMMs in ppc64 in we want to align down the guest
+     * visible space, instead of align up, to avoid writing
+     * beyond the end of file by adding a potential 256MiB
+     * to the user specified size.
+     *
+     * The label-size is mandatory for ppc64 as well, meaning that
+     * the guest visible space will be target_size-label_size.
+     *
+     * Finally, target_size must include label_size.
+     *
+     * The above can be summed up as follows:
+     *
+     * target_size = AlignDown(target_size - label_size) + label_size
+     */
+    unsigned long long ppc64AlignSize = qemuDomainGetMemorySizeAlignment(def);
+    unsigned long long guestArea = mem->size - mem->labelsize;
+
+    /* Align down guest_area. 256MiB is the minimum size. Error
+     * out if target_size is smaller than 256MiB + label_size,
+     * since aligning it up will cause QEMU errors. */
+    if (mem->size < (ppc64AlignSize + mem->labelsize)) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                       _("minimum target size for the NVDIMM "
+                         "must be 256MB plus the label size"));
+        return -1;
+    }
+
+    guestArea = (guestArea/ppc64AlignSize) * ppc64AlignSize;
+    mem->size = guestArea + mem->labelsize;
+
+    return 0;
+}
+
+
 int
 qemuDomainAlignMemorySizes(virDomainDefPtr def)
 {
@@ -8113,8 +8151,11 @@ qemuDomainAlignMemorySizes(virDomainDefPtr def)
 
     /* Align memory module sizes */
     for (i = 0; i < def->nmems; i++) {
-        if (!(def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM &&
-              ARCH_IS_PPC64(def->os.arch))) {
+        if (def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM &&
+            ARCH_IS_PPC64(def->os.arch)) {
+            if (qemuDomainNVDimmAlignSizePseries(def, def->mems[i]) < 0)
+                return -1;
+        } else {
             align = qemuDomainGetMemoryModuleSizeAlignment(def, def->mems[i]);
             def->mems[i]->size = VIR_ROUND_UP(def->mems[i]->size, align);
         }
@@ -8143,15 +8184,19 @@ qemuDomainAlignMemorySizes(virDomainDefPtr def)
  * inplace. Default rounding is now to 1 MiB (qemu requires rounding to page,
  * size so this should be safe).
  */
-void
+int
 qemuDomainMemoryDeviceAlignSize(virDomainDefPtr def,
                                 virDomainMemoryDefPtr mem)
 {
-    if (!(mem->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM &&
-          ARCH_IS_PPC64(def->os.arch))) {
+    if (mem->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM &&
+        ARCH_IS_PPC64(def->os.arch)) {
+        return qemuDomainNVDimmAlignSizePseries(def, mem);
+    } else {
         mem->size = VIR_ROUND_UP(mem->size,
                                  qemuDomainGetMemorySizeAlignment(def));
     }
+
+    return 0;
 }
 
 
index 77d6bfc810313b958d010296c5d5bb16de5cbcd1..c7c3c5c073ea3bde4d6eb7ba01a6cdb7e7e51d6e 100644 (file)
@@ -751,11 +751,9 @@ bool qemuDomainDiskBlockJobIsActive(virDomainDiskDefPtr disk);
 bool qemuDomainHasBlockjob(virDomainObjPtr vm, bool copy_only)
     ATTRIBUTE_NONNULL(1);
 
-unsigned long long qemuDomainGetMemorySizeAlignment(const virDomainDef *def);
-
 int qemuDomainAlignMemorySizes(virDomainDefPtr def);
-void qemuDomainMemoryDeviceAlignSize(virDomainDefPtr def,
-                                     virDomainMemoryDefPtr mem);
+int qemuDomainMemoryDeviceAlignSize(virDomainDefPtr def,
+                                    virDomainMemoryDefPtr mem);
 
 virDomainChrDefPtr qemuFindAgentConfig(virDomainDefPtr def);
 
index f20b8e9a56ddee18ec8fb1ee87db9d28f4accf9a..fc0866c01187c3a9ae9b062cc3bdd77f00e684ea 100644 (file)
@@ -2368,7 +2368,8 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver,
     int id;
     int ret = -1;
 
-    qemuDomainMemoryDeviceAlignSize(vm->def, mem);
+    if (qemuDomainMemoryDeviceAlignSize(vm->def, mem) < 0)
+        goto cleanup;
 
     if (qemuDomainDefValidateMemoryHotplug(vm->def, priv->qemuCaps, mem) < 0)
         goto cleanup;
@@ -5612,7 +5613,8 @@ qemuDomainDetachPrepMemory(virDomainObjPtr vm,
     virDomainMemoryDefPtr mem;
     int idx;
 
-    qemuDomainMemoryDeviceAlignSize(vm->def, match);
+    if (qemuDomainMemoryDeviceAlignSize(vm->def, match) < 0)
+        return -1;
 
     if ((idx = virDomainMemoryFindByDef(vm->def, match)) < 0) {
         virReportError(VIR_ERR_DEVICE_MISSING,
index e2508459914700f109dbabd2d6db2645aea8478a..3ed4039cdf70db5d42e59d86b54faad73a303c14 100644 (file)
@@ -4035,45 +4035,15 @@ qemuValidateDomainDeviceDefHub(virDomainHubDefPtr hub,
 }
 
 
-static unsigned long long
-qemuValidateGetNVDIMMAlignedSizePseries(virDomainMemoryDefPtr mem,
-                                        const virDomainDef *def)
-{
-    unsigned long long ppc64AlignSize = qemuDomainGetMemorySizeAlignment(def);
-    unsigned long long guestArea = mem->size - mem->labelsize;
-
-    /* NVDIMM is already aligned */
-    if (guestArea % ppc64AlignSize == 0)
-        return mem->size;
-
-    /* Suggested aligned size is rounded up */
-    guestArea = (guestArea/ppc64AlignSize + 1) * ppc64AlignSize;
-    return guestArea + mem->labelsize;
-}
-
 static int
 qemuValidateDomainDeviceDefMemory(virDomainMemoryDefPtr mem,
-                                  const virDomainDef *def,
                                   virQEMUCapsPtr qemuCaps)
 {
-    if (mem->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM) {
-        if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_NVDIMM)) {
-            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
-                           _("nvdimm isn't supported by this QEMU binary"));
-            return -1;
-        }
-
-        if (qemuDomainIsPSeries(def)) {
-            unsigned long long alignedNVDIMMSize =
-                qemuValidateGetNVDIMMAlignedSizePseries(mem, def);
-
-            if (mem->size != alignedNVDIMMSize) {
-                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
-                               _("nvdimm size is not aligned. Suggested aligned "
-                                 "size: %llu KiB"), alignedNVDIMMSize);
-                return -1;
-            }
-        }
+    if (mem->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM &&
+        !virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_NVDIMM)) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                       _("nvdimm isn't supported by this QEMU binary"));
+        return -1;
     }
 
     return 0;
@@ -4191,7 +4161,7 @@ qemuValidateDomainDeviceDef(const virDomainDeviceDef *dev,
         break;
 
     case VIR_DOMAIN_DEVICE_MEMORY:
-        ret = qemuValidateDomainDeviceDefMemory(dev->data.memory, def, qemuCaps);
+        ret = qemuValidateDomainDeviceDefMemory(dev->data.memory, qemuCaps);
         break;
 
     case VIR_DOMAIN_DEVICE_LEASE:
index 58e3f9e1613020cd2abc063aebcb6f8e89ccb6c7..eff80dcf80f0d3f30260f0a45bf097f43dec2620 100644 (file)
@@ -19,7 +19,7 @@ file=/tmp/lib/domain--1-QEMUGuest1/master-key.aes \
 -smp 2,sockets=2,dies=1,cores=1,threads=1 \
 -numa node,nodeid=0,cpus=0-1,mem=1024 \
 -object memory-backend-file,id=memnvdimm0,prealloc=yes,mem-path=/tmp/nvdimm,\
-size=805437440 \
+size=537001984 \
 -device nvdimm,node=0,label-size=131072,\
 uuid=49545eb3-75e1-2d0a-acdd-f0294406c99e,memdev=memnvdimm0,id=nvdimm0,slot=0 \
 -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \
index 10c146e8cfce4d8db799000ccb89f806fbe5ef9b..ae5a17d3c8d731e394158e61174b778cc52b23a2 100644 (file)
@@ -38,7 +38,7 @@
         <path>/tmp/nvdimm</path>
       </source>
       <target>
-        <size unit='KiB'>786560</size>
+        <size unit='KiB'>550000</size>
         <node>0</node>
         <label>
           <size unit='KiB'>128</size>
index 10c146e8cfce4d8db799000ccb89f806fbe5ef9b..ae5a17d3c8d731e394158e61174b778cc52b23a2 100644 (file)
@@ -38,7 +38,7 @@
         <path>/tmp/nvdimm</path>
       </source>
       <target>
-        <size unit='KiB'>786560</size>
+        <size unit='KiB'>550000</size>
         <node>0</node>
         <label>
           <size unit='KiB'>128</size>