]> git.ipfire.org Git - thirdparty/libvirt.git/commitdiff
schemas: Introduce disk type NVMe
authorMichal Privoznik <mprivozn@redhat.com>
Mon, 3 Jun 2019 08:46:18 +0000 (10:46 +0200)
committerMichal Privoznik <mprivozn@redhat.com>
Tue, 17 Dec 2019 09:04:43 +0000 (10:04 +0100)
There is this class of PCI devices that act like disks: NVMe.
Therefore, they are both PCI devices and disks. While we already
have <hostdev/> (and can assign a NVMe device to a domain
successfully) we don't have disk representation. There are three
problems with PCI assignment in case of a NVMe device:

1) domains with <hostdev/> can't be migrated

2) NVMe device is assigned whole, there's no way to assign only a
   namespace

3) Because hypervisors see <hostdev/> they don't put block layer
   on top of it - users don't get all the fancy features like
   snapshots

NVMe namespaces are way of splitting one continuous NVDIMM memory
into smaller ones, effectively creating smaller NVMe-s (which can
then be partitioned, LVMed, etc.)

Because of all of this the following XML was chosen to model a
NVMe device:

  <disk type='nvme' device='disk'>
    <driver name='qemu' type='raw'/>
    <source type='pci' managed='yes' namespace='1'>
      <address domain='0x0000' bus='0x01' slot='0x00' function='0x0'/>
    </source>
    <target dev='vda' bus='virtio'/>
  </disk>

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Cole Robinson <crobinso@redhat.com>
docs/formatdomain.html.in
docs/schemas/domaincommon.rng
tests/qemuxml2argvdata/disk-nvme.xml [new file with mode: 0644]

index bfcdc026e66743d230e80fc9a49a09c23e882be6..e06cf2061be797c5ee541b2cb05e8ac34bd8f8ec 100644 (file)
     &lt;/backingStore&gt;
     &lt;target dev='vdd' bus='virtio'/&gt;
   &lt;/disk&gt;
+  &lt;disk type='nvme' device='disk'&gt;
+    &lt;driver name='qemu' type='raw'/&gt;
+    &lt;source type='pci' managed='yes' namespace='1'&gt;
+      &lt;address domain='0x0000' bus='0x01' slot='0x00' function='0x0'/&gt;
+    &lt;/source&gt;
+    &lt;target dev='vde' bus='virtio'/&gt;
+  &lt;/disk&gt;
 &lt;/devices&gt;
 ...</pre>
 
             Valid values are "file", "block",
             "dir" (<span class="since">since 0.7.5</span>),
             "network" (<span class="since">since 0.8.7</span>), or
-            "volume" (<span class="since">since 1.0.5</span>)
+            "volume" (<span class="since">since 1.0.5</span>), or
+            "nvme" (<span class="since">since 6.0.0</span>)
             and refer to the underlying source for the disk.
             <span class="since">Since 0.0.3</span>
             </dd>
               <span class="since">Since 1.0.5</span>
               </p>
               </dd>
+            <dt><code>nvme</code></dt>
+              <dd>
+              To specify disk source for NVMe disk the <code>source</code>
+              element has the following attributes:
+              <dl>
+                <dt><code>type</code></dt>
+                <dd>The type of address specified in <code>address</code>
+                sub-element. Currently, only <code>pci</code> value is
+                accepted.
+                </dd>
+
+                <dt><code>managed</code></dt>
+                <dd>This attribute instructs libvirt to detach NVMe
+                controller automatically on domain startup (<code>yes</code>)
+                or expect the controller to be detached by system
+                administrator (<code>no</code>).
+                </dd>
+
+                <dt><code>namespace</code></dt>
+                <dd>The namespace ID which should be assigned to the domain.
+                According to NVMe standard, namespace numbers start from 1,
+                including.
+                </dd>
+              </dl>
+
+              The difference between <code>&lt;disk type='nvme'&gt;</code>
+              and <code>&lt;hostdev/&gt;</code> is that the latter is plain
+              host device assignment with all its limitations (e.g. no live
+              migration), while the former makes hypervisor to run the NVMe
+              disk through hypervisor's block layer thus enabling all
+              features provided by the layer (e.g. snapshots, domain
+              migration, etc.). Moreover, since the NVMe disk is unbinded
+              from its PCI driver, the host kernel storage stack is not
+              involved (compared to passing say <code>/dev/nvme0n1</code> via
+              <code>&lt;disk type='block'&gt;</code> and therefore lower
+              latencies can be achieved.
+              </dd>
           </dl>
         With "file", "block", and "volume", one or more optional
         sub-elements <code>seclabel</code>, <a href="#seclabel">described
             initiator IQN needed to access the source via mandatory
             attribute <code>name</code>.
           </dd>
+          <dt><code>address</code></dt>
+          <dd>For disk of type <code>nvme</code> this element
+            specifies the PCI address of the host NVMe
+            controller.
+            <span class="since">Since 6.0.0</span>
+          </dd>
         </dl>
 
         <p>
index f5b51d20ad4de2e93049e27e41c0c54e3ace6f56..e964773f5e0da704f9183c94ebb66c96abdda799 100644 (file)
       <ref name="diskSourceDir"/>
       <ref name="diskSourceNetwork"/>
       <ref name="diskSourceVolume"/>
+      <ref name="diskSourceNvme"/>
     </choice>
   </define>
 
     </optional>
   </define>
 
+  <define name="diskSourceNvme">
+    <attribute name="type">
+      <value>nvme</value>
+    </attribute>
+    <optional>
+      <element name="source">
+        <attribute name="type">
+          <value>pci</value>
+        </attribute>
+        <attribute name="namespace">
+          <ref name="uint32"/>
+        </attribute>
+        <optional>
+          <attribute name="managed">
+            <ref name="virYesNo"/>
+          </attribute>
+        </optional>
+        <element name="address">
+          <ref name="pciaddress"/>
+        </element>
+        <ref name="diskSourceCommon"/>
+        <optional>
+          <ref name="storageStartupPolicy"/>
+        </optional>
+        <optional>
+          <ref name="encryption"/>
+        </optional>
+      </element>
+    </optional>
+  </define>
+
   <define name="diskTarget">
     <data type="string">
       <param name="pattern">(ioemu:)?(fd|hd|sd|vd|xvd|ubd)[a-zA-Z0-9_]+</param>
diff --git a/tests/qemuxml2argvdata/disk-nvme.xml b/tests/qemuxml2argvdata/disk-nvme.xml
new file mode 100644 (file)
index 0000000..4f4f8b9
--- /dev/null
@@ -0,0 +1,63 @@
+<domain type='qemu'>
+  <name>QEMUGuest1</name>
+  <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+  <memory unit='KiB'>219136</memory>
+  <currentMemory unit='KiB'>219136</currentMemory>
+  <vcpu placement='static'>1</vcpu>
+  <os>
+    <type arch='i686' machine='pc'>hvm</type>
+    <boot dev='hd'/>
+  </os>
+  <clock offset='utc'/>
+  <on_poweroff>destroy</on_poweroff>
+  <on_reboot>restart</on_reboot>
+  <on_crash>destroy</on_crash>
+  <devices>
+    <emulator>/usr/bin/qemu-system-i386</emulator>
+    <disk type='nvme' device='disk'>
+      <driver name='qemu' type='raw'/>
+      <source type='pci' managed='yes' namespace='1'>
+        <address domain='0x0000' bus='0x01' slot='0x00' function='0x0'/>
+      </source>
+      <target dev='vda' bus='virtio'/>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/>
+    </disk>
+    <disk type='nvme' device='disk'>
+      <driver name='qemu' type='raw'/>
+      <source type='pci' managed='yes' namespace='2'>
+        <address domain='0x0000' bus='0x01' slot='0x00' function='0x0'/>
+      </source>
+      <target dev='vdb' bus='virtio'/>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/>
+    </disk>
+    <disk type='nvme' device='disk'>
+      <driver name='qemu' type='raw'/>
+      <source type='pci' managed='no' namespace='1'>
+        <address domain='0x0000' bus='0x02' slot='0x00' function='0x0'/>
+      </source>
+      <target dev='vdc' bus='virtio'/>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/>
+    </disk>
+    <disk type='nvme' device='disk'>
+      <driver name='qemu' type='qcow2' cache='none'/>
+      <source type='pci' managed='no' namespace='2'>
+        <address domain='0x0001' bus='0x02' slot='0x00' function='0x0'/>
+        <encryption format='luks'>
+          <secret type='passphrase' uuid='0a81f5b2-8403-7b23-c8d6-21ccc2f80d6f'/>
+        </encryption>
+      </source>
+      <target dev='vdd' bus='virtio'/>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x07' function='0x0'/>
+    </disk>
+    <controller type='usb' index='0'>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/>
+    </controller>
+    <controller type='pci' index='0' model='pci-root'/>
+    <controller type='scsi' index='0' model='virtio-scsi'>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/>
+    </controller>
+    <input type='mouse' bus='ps2'/>
+    <input type='keyboard' bus='ps2'/>
+    <memballoon model='none'/>
+  </devices>
+</domain>