]> git.ipfire.org Git - thirdparty/open-vm-tools.git/commitdiff
VMCI Queue Pair Rewrite
authorVMware, Inc <>
Mon, 26 Sep 2011 18:38:02 +0000 (11:38 -0700)
committerMarcelo Vanzin <mvanzin@vmware.com>
Mon, 26 Sep 2011 18:38:02 +0000 (11:38 -0700)
The VMCI queue pair implementation is being simplified to only
supporting VM to host communication. This new approach will
use the VM memory directly for the queue pair content, making
any host side queue pair operations work directly on the VMs
main memory, instead of operating on special shared memory
that has been remapped into the VM.

Signed-off-by: Marcelo Vanzin <mvanzin@vmware.com>
open-vm-tools/modules/linux/shared/vmci_iocontrols.h
open-vm-tools/modules/linux/shared/vmci_kernel_if.h
open-vm-tools/modules/linux/vmci/common/vmciContext.c
open-vm-tools/modules/linux/vmci/common/vmciContext.h
open-vm-tools/modules/linux/vmci/common/vmciQPair.c
open-vm-tools/modules/linux/vmci/common/vmciQueuePair.c
open-vm-tools/modules/linux/vmci/common/vmciQueuePair.h
open-vm-tools/modules/linux/vmci/linux/driver.c
open-vm-tools/modules/linux/vmci/linux/vmciKernelIf.c
open-vm-tools/modules/linux/vmci/linux/vmci_version.h
open-vm-tools/modules/linux/vmci/shared/vmciQueue.h

index b75b64fc7c6fe69dbc9189fdb7f03b19163bdc3b..b9cd5d644b476da839f4e2f4d56774c734198d23 100644 (file)
@@ -115,6 +115,12 @@ VMCIPtrToVA64(void const *ptr) // IN
  * versions are ways of detecting previous versions of the connecting
  * application (i.e., VMX).
  *
+ * VMCI_VERSION_NOVMVM: This version removed support for VM to VM
+ * communication.
+ *
+ * VMCI_VERSION_NOTIFY: This version introduced doorbell notification
+ * support.
+ *
  * VMCI_VERSION_HOSTQP: This version introduced host end point support
  * for hosted products.
  *
@@ -127,7 +133,8 @@ VMCIPtrToVA64(void const *ptr) // IN
  * driver.
  */
 
-#define VMCI_VERSION                VMCI_VERSION_NOTIFY
+#define VMCI_VERSION                VMCI_VERSION_NOVMVM
+#define VMCI_VERSION_NOVMVM         VMCI_MAKE_VERSION(11, 0)
 #define VMCI_VERSION_NOTIFY         VMCI_MAKE_VERSION(10, 0)
 #define VMCI_VERSION_HOSTQP         VMCI_MAKE_VERSION(9, 0)
 #define VMCI_VERSION_PREHOSTQP      VMCI_MAKE_VERSION(8, 0)
@@ -206,19 +213,20 @@ enum IOCTLCmd_VMCI {
    IOCTLCMD(RESERVED2),
 
    /*
-    * The following two used to be for shared memory.  They are now unused and
-    * and are reserved for future use.  They will fail if issued.
+    * The following used to be for shared memory. It is now unused and and is
+    * reserved for future use. It will fail if issued.
     */
    IOCTLCMD(RESERVED3),
-   IOCTLCMD(RESERVED4),
 
    /*
-    * The follwoing two were also used to be for shared memory. An old
-    * WS6 user-mode client might try to use them with the new driver,
-    * but since we ensure that only contexts created by VMX'en of the
-    * appropriate version (VMCI_VERSION_NOTIFY) or higher use this
-    * ioctl, everything is fine.
+    * The follwoing three were also used to be for shared memory. An
+    * old WS6 user-mode client might try to use them with the new
+    * driver, but since we ensure that only contexts created by VMX'en
+    * of the appropriate version (VMCI_VERSION_NOTIFY or
+    * VMCI_VERSION_NEWQP) or higher use these ioctl, everything is
+    * fine.
     */
+   IOCTLCMD(QUEUEPAIR_SETVA),
    IOCTLCMD(NOTIFY_RESOURCE),
    IOCTLCMD(NOTIFICATIONS_RECEIVE),
    IOCTLCMD(VERSION2),
@@ -383,6 +391,8 @@ enum IOCTLCmd_VMCIWin32 {
                VMCIIOCTL_BUFFERED(VERSION2)
 #define IOCTL_VMCI_QUEUEPAIR_ALLOC  \
                VMCIIOCTL_BUFFERED(QUEUEPAIR_ALLOC)
+#define IOCTL_VMCI_QUEUEPAIR_SETVA  \
+               VMCIIOCTL_BUFFERED(QUEUEPAIR_SETVA)
 #define IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE  \
                VMCIIOCTL_BUFFERED(QUEUEPAIR_SETPAGEFILE)
 #define IOCTL_VMCI_QUEUEPAIR_DETACH  \
@@ -469,7 +479,7 @@ typedef struct VMCISharedMemInfo {
    char       pageFileName[VMCI_PATH_MAX];
 } VMCISharedMemInfo;
 
-typedef struct VMCIQueuePairAllocInfo {
+typedef struct VMCIQueuePairAllocInfo_VMToVM {
    VMCIHandle handle;
    VMCIId     peer;
    uint32     flags;
@@ -486,8 +496,32 @@ typedef struct VMCIQueuePairAllocInfo {
 #endif
    int32      result;
    uint32     _pad;
+} VMCIQueuePairAllocInfo_VMToVM;
+
+typedef struct VMCIQueuePairAllocInfo {
+   VMCIHandle handle;
+   VMCIId     peer;
+   uint32     flags;
+   uint64     produceSize;
+   uint64     consumeSize;
+#if !defined(VMX86_SERVER) && !defined(VMKERNEL)
+   VA64       ppnVA; /* Start VA of queue pair PPNs. */
+#else
+   PPN *      PPNs;
+#endif
+   uint64     numPPNs;
+   int32      result;
+   uint32     version;
 } VMCIQueuePairAllocInfo;
 
+typedef struct VMCIQueuePairSetVAInfo {
+   VMCIHandle handle;
+   VA64       va; /* Start VA of queue pair PPNs. */
+   uint64     numPPNs;
+   uint32     version;
+   int32      result;
+} VMCIQueuePairSetVAInfo;
+
 /*
  * For backwards compatibility, here is a version of the
  * VMCIQueuePairPageFileInfo before host support end-points was added.
@@ -693,6 +727,7 @@ enum VMCrossTalkSockOpt {
    VMCI_SO_NOTIFICATIONS_RECEIVE    = IOCTL_VMCI_NOTIFICATIONS_RECEIVE,
    VMCI_SO_VERSION2                 = IOCTL_VMCI_VERSION2,
    VMCI_SO_QUEUEPAIR_ALLOC          = IOCTL_VMCI_QUEUEPAIR_ALLOC,
+   VMCI_SO_QUEUEPAIR_SETVA          = IOCTL_VMCI_QUEUEPAIR_SETVA,
    VMCI_SO_QUEUEPAIR_SETPAGEFILE    = IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE,
    VMCI_SO_QUEUEPAIR_DETACH         = IOCTL_VMCI_QUEUEPAIR_DETACH,
    VMCI_SO_DATAGRAM_SEND            = IOCTL_VMCI_DATAGRAM_SEND,
index f884971bcd19d7d83bb01bd855098741e76d60b6..c81f0c18f957b0e5d0ed72b86c44677957255c4c 100644 (file)
@@ -57,6 +57,7 @@
 #  include "splock.h"
 #  include "semaphore_ext.h"
 #  include "vmkapi.h"
+#  include "world.h"
 #endif
 
 #ifdef SOLARIS
   typedef Semaphore VMCIMutex;
   typedef World_ID VMCIHostVmID;
   typedef uint32 VMCIHostUser;
+  typedef PPN *VMCIQPGuestMem;
 #elif defined(linux)
   typedef spinlock_t VMCILock;
   typedef unsigned long VMCILockFlags;
   typedef struct semaphore VMCIMutex;
   typedef PPN *VMCIPpnList; /* List of PPNs in produce/consume queue. */
   typedef uid_t VMCIHostUser;
+  typedef VA64 VMCIQPGuestMem;
 #elif defined(__APPLE__)
   typedef IOLock *VMCILock;
   typedef unsigned long VMCILockFlags;
   typedef IOLock *VMCIMutex;
   typedef void *VMCIPpnList; /* Actually a pointer to the C++ Object IOMemoryDescriptor */
   typedef uid_t VMCIHostUser;
+  typedef VA64 *VMCIQPGuestMem;
 #elif defined(_WIN32)
   typedef KSPIN_LOCK VMCILock;
   typedef KIRQL VMCILockFlags;
   typedef FAST_MUTEX VMCIMutex;
   typedef PMDL VMCIPpnList; /* MDL to map the produce/consume queue. */
   typedef PSID VMCIHostUser;
+  typedef VA64 *VMCIQPGuestMem;
 #elif defined(SOLARIS)
   typedef kmutex_t VMCILock;
   typedef unsigned long VMCILockFlags;
   typedef kmutex_t VMCIMutex;
   typedef PPN *VMCIPpnList; /* List of PPNs in produce/consume queue. */
   typedef uid_t VMCIHostUser;
+  typedef VA64 VMCIQPGuestMem;
 #endif // VMKERNEL
 
 /* Callback needed for correctly waiting on events. */
@@ -325,49 +331,71 @@ int VMCI_PopulatePPNList(uint8 *callBuf, const PPNSet *ppnSet);
 
 struct VMCIQueue;
 
-#if !defined(VMKERNEL)
 struct PageStoreAttachInfo;
 struct VMCIQueue *VMCIHost_AllocQueue(uint64 queueSize);
 void VMCIHost_FreeQueue(struct VMCIQueue *queue, uint64 queueSize);
 
-int VMCIHost_GetUserMemory(struct PageStoreAttachInfo *attach,
-                           struct VMCIQueue *produceQ,
-                           struct VMCIQueue *detachQ);
-void VMCIHost_ReleaseUserMemory(struct PageStoreAttachInfo *attach,
-                                struct VMCIQueue *produceQ,
-                                struct VMCIQueue *detachQ);
-#endif // VMKERNEL
-
-#ifdef _WIN32
-/*
- * Special routine used on the Windows platform to save a queue when
- * its backing memory goes away.
- */
-
-void VMCIHost_SaveProduceQ(struct PageStoreAttachInfo *attach,
-                           struct VMCIQueue *produceQ,
-                           struct VMCIQueue *detachQ,
-                           const uint64 produceQSize);
-#endif // _WIN32
+#if defined(VMKERNEL)
+typedef World_Handle *VMCIGuestMemID;
+#define INVALID_VMCI_GUEST_MEM_ID  NULL
+#else
+typedef uint32 VMCIGuestMemID;
+#define INVALID_VMCI_GUEST_MEM_ID  0
+#endif
 
-#ifdef _WIN32
-void VMCI_InitQueueMutex(struct VMCIQueue *produceQ,
-                             struct VMCIQueue *consumeQ);
-void VMCI_AcquireQueueMutex(struct VMCIQueue *queue);
-void VMCI_ReleaseQueueMutex(struct VMCIQueue *queue);
-Bool VMCI_EnqueueToDevNull(struct VMCIQueue *queue);
-int VMCI_ConvertToLocalQueue(struct VMCIQueue *queueInfo,
-                             struct VMCIQueue *otherQueueInfo,
-                             uint64 size, Bool keepContent,
-                             void **oldQueue);
-void VMCI_RevertToNonLocalQueue(struct VMCIQueue *queueInfo,
-                                void *nonLocalQueue, uint64 size);
-void VMCI_FreeQueueBuffer(void *queue, uint64 size);
-Bool VMCI_CanCreate(void);
-#else // _WIN32
+#if defined(VMKERNEL) || defined(__linux__)  || defined(_WIN32) || \
+    defined(__APPLE__)
+  struct QueuePairPageStore;
+  int VMCIHost_RegisterUserMemory(struct QueuePairPageStore *pageStore,
+                                  struct VMCIQueue *produceQ,
+                                  struct VMCIQueue *consumeQ);
+  void VMCIHost_UnregisterUserMemory(struct VMCIQueue *produceQ,
+                                     struct VMCIQueue *consumeQ);
+  int VMCIHost_MapQueueHeaders(struct VMCIQueue *produceQ,
+                               struct VMCIQueue *consumeQ);
+  int VMCIHost_UnmapQueueHeaders(VMCIGuestMemID gid,
+                                 struct VMCIQueue *produceQ,
+                                 struct VMCIQueue *consumeQ);
+  void VMCI_InitQueueMutex(struct VMCIQueue *produceQ,
+                           struct VMCIQueue *consumeQ);
+  void VMCI_CleanupQueueMutex(struct VMCIQueue *produceQ,
+                              struct VMCIQueue *consumeQ);
+  void VMCI_AcquireQueueMutex(struct VMCIQueue *queue);
+  void VMCI_ReleaseQueueMutex(struct VMCIQueue *queue);
+#else // Below are the guest OS'es without host side support.
 #  define VMCI_InitQueueMutex(_pq, _cq)
+#  define VMCI_CleanupQueueMutex(_pq, _cq)
 #  define VMCI_AcquireQueueMutex(_q)
 #  define VMCI_ReleaseQueueMutex(_q)
+#  define VMCIHost_RegisterUserMemory(_ps, _pq, _cq) VMCI_ERROR_UNAVAILABLE
+#  define VMCIHost_UnregisterUserMemory(_pq, _cq)
+#  define VMCIHost_MapQueueHeaders(_pq, _cq) VMCI_SUCCESS
+#  define VMCIHost_UnmapQueueHeaders(_gid, _pq, _cq) VMCI_SUCCESS
+#endif
+
+#if (!defined(VMKERNEL) && defined(__linux__)) || defined(_WIN32) ||  \
+   defined(__APPLE__) || defined(SOLARIS)
+  int VMCIHost_GetUserMemory(VA64 produceUVA, VA64 consumeUVA,
+                             struct VMCIQueue *produceQ,
+                             struct VMCIQueue *consumeQ);
+  void VMCIHost_ReleaseUserMemory(struct VMCIQueue *produceQ,
+                                  struct VMCIQueue *consumeQ);
+#else
+#  define VMCIHost_GetUserMemory(_puva, _cuva, _pq, _cq) VMCI_ERROR_UNAVAILABLE
+#  define VMCIHost_ReleaseUserMemory(_pq, _cq) ASSERT_NOT_IMPLEMENTED(FALSE)
+#endif
+
+#if defined(_WIN32)
+    Bool VMCI_EnqueueToDevNull(struct VMCIQueue *queue);
+    int VMCI_ConvertToLocalQueue(struct VMCIQueue *queueInfo,
+                                 struct VMCIQueue *otherQueueInfo,
+                                 uint64 size, Bool keepContent,
+                                 void **oldQueue);
+    void VMCI_RevertToNonLocalQueue(struct VMCIQueue *queueInfo,
+                                    void *nonLocalQueue, uint64 size);
+    void VMCI_FreeQueueBuffer(void *queue, uint64 size);
+    Bool VMCI_CanCreate(void);
+#else // _WIN32
 #  define VMCI_EnqueueToDevNull(_q) FALSE
 #  define VMCI_ConvertToLocalQueue(_pq, _cq, _s, _oq, _kc) VMCI_ERROR_UNAVAILABLE
 #  define VMCI_RevertToNonLocalQueue(_q, _nlq, _s)
index 89d4186ff4443c7b5fa34049fc4dea18d1e89d06..5a7a7d62cd9cd18acff818b9a58e79e29bf7294e 100644 (file)
@@ -506,7 +506,6 @@ VMCIContextFreeContext(VMCIContext *context)  // IN
       tempHandle = VMCIHandleArray_RemoveTail(context->wellKnownArray);
    }
 
-#ifndef VMKERNEL
    /*
     * Cleanup all queue pair resources attached to context.  If the VM dies
     * without cleaning up, this code will make sure that no resources are
@@ -516,7 +515,7 @@ VMCIContextFreeContext(VMCIContext *context)  // IN
    tempHandle = VMCIHandleArray_GetEntry(context->queuePairArray, 0);
    while (!VMCI_HANDLE_EQUAL(tempHandle, VMCI_INVALID_HANDLE)) {
       VMCIQPBroker_Lock();
-      if (VMCIQPBroker_Detach(tempHandle, context, TRUE) < VMCI_SUCCESS) {
+      if (VMCIQPBroker_Detach(tempHandle, context) < VMCI_SUCCESS) {
          /*
           * When VMCIQPBroker_Detach() succeeds it removes the handle from the
           * array.  If detach fails, we must remove the handle ourselves.
@@ -526,16 +525,6 @@ VMCIContextFreeContext(VMCIContext *context)  // IN
       VMCIQPBroker_Unlock();
       tempHandle = VMCIHandleArray_GetEntry(context->queuePairArray, 0);
    }
-#else
-   /*
-    * On ESX, all entries in the queuePairArray have been cleaned up
-    * either by the regular VMCI device destroy path or by the world
-    * cleanup destroy path. We assert that no resources are leaked.
-    */
-
-   ASSERT(VMCI_HANDLE_EQUAL(VMCIHandleArray_GetEntry(context->queuePairArray, 0),
-                            VMCI_INVALID_HANDLE));
-#endif /* !VMKERNEL */
 
    /*
     * It is fine to destroy this without locking the callQueue, as
@@ -1050,6 +1039,7 @@ VMCIContext_FindAndUpdateSrcFSR(VMCIId migrateCid,      // IN
  *
  *----------------------------------------------------------------------
  */
+
 Bool
 VMCIContext_IsActiveHnd(VMCIContext *context, // IN
                         uintptr_t eventHnd)   // IN
@@ -1065,6 +1055,36 @@ VMCIContext_IsActiveHnd(VMCIContext *context, // IN
 }
 
 
+/*
+ *----------------------------------------------------------------------
+ *
+ * VMCIContext_GetActiveHnd --
+ *
+ *      Returns the curent event handle.
+ *
+ * Results:
+ *      The current active handle.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+uintptr_t
+VMCIContext_GetActiveHnd(VMCIContext *context) // IN
+{
+   VMCILockFlags flags;
+   uintptr_t activeHnd;
+
+   ASSERT(context);
+   VMCI_GrabLock(&context->lock, &flags);
+   activeHnd = VMCIHost_GetActiveHnd(&context->hostContext);
+   VMCI_ReleaseLock(&context->lock, flags);
+   return activeHnd;
+}
+
+
 /*
  *----------------------------------------------------------------------
  *
@@ -2393,7 +2413,7 @@ VMCI_IsContextOwner(VMCIId contextID,   // IN
  *      by this caller.
  *
  * Results:
- *      VMCI_SUCCESS on success, error code otherwise.
+ *      TRUE if context supports host queue pairs, FALSE otherwise.
  *
  * Side effects:
  *      None.
@@ -2413,3 +2433,48 @@ VMCIContext_SupportsHostQP(VMCIContext *context)    // IN: Context structure
    return TRUE;
 #endif
 }
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * VMCIContext_ReleaseGuestMem --
+ *
+ *      Releases all the contexts references to guest memory.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+VMCIContext_ReleaseGuestMem(VMCIContext *context, // IN: Context structure
+                            VMCIGuestMemID gid)   // IN: reference to guest
+{
+#ifdef VMKERNEL
+   uint32 numQueuePairs;
+   uint32 cur;
+
+   numQueuePairs = VMCIHandleArray_GetSize(context->queuePairArray);
+   for (cur = 0; cur < numQueuePairs; cur++) {
+      VMCIHandle handle;
+      handle = VMCIHandleArray_GetEntry(context->queuePairArray, cur);
+      if (!VMCI_HANDLE_EQUAL(handle, VMCI_INVALID_HANDLE)) {
+         int res;
+
+         VMCIQPBroker_Lock();
+         res = VMCIQPBroker_Unmap(handle, context, gid);
+         if (res < VMCI_SUCCESS) {
+            VMCI_WARNING(("Failed to unmap guest memory for queue pair "
+                          "(handle=0x%x:0x%x, res=%d).\n",
+                          handle.context, handle.resource, res));
+         }
+         VMCIQPBroker_Unlock();
+      }
+   }
+#endif
+}
index 1258e055b43ca310753d5ea6ff3156647b8f31b6..60bcdf610b3f300f95e9db3e3a73e8bdeb585bce 100644 (file)
@@ -35,6 +35,7 @@
 #include "vmci_call_defs.h"
 #include "vmci_handle_array.h"
 #include "vmci_infrastructure.h"
+#include "vmci_kernel_if.h"
 
 #define MAX_QUEUED_GUESTCALLS_PER_VM  100
 
@@ -57,10 +58,9 @@ void VMCIContext_SetFSRState(VMCIContext *context,
 VMCIContext *VMCIContext_FindAndUpdateSrcFSR(VMCIId migrateCid,
                                              uintptr_t eventHnd,
                                              uintptr_t *srcEventHnd);
-Bool VMCIContext_IsActiveHnd(VMCIContext *context,
-                             uintptr_t eventHnd);
-void VMCIContext_SetInactiveHnd(VMCIContext *context,
-                                uintptr_t eventHnd);
+Bool VMCIContext_IsActiveHnd(VMCIContext *context, uintptr_t eventHnd);
+uintptr_t VMCIContext_GetActiveHnd(VMCIContext *context);
+void VMCIContext_SetInactiveHnd(VMCIContext *context, uintptr_t eventHnd);
 Bool VMCIContext_RemoveHnd(VMCIContext *context,
                            uintptr_t eventHnd,
                            uint32 *numOld,
@@ -91,6 +91,8 @@ int VMCIContext_GetCheckpointState(VMCIId contextID, uint32 cptType,
                                    uint32 *numCIDs, char **cptBufPtr);
 int VMCIContext_SetCheckpointState(VMCIId contextID, uint32 cptType,
                                    uint32 numCIDs, char *cptBuf);
+void VMCIContext_ReleaseGuestMem(VMCIContext *context, VMCIGuestMemID gid);
+
 #ifndef VMX86_SERVER
 void VMCIContext_CheckAndSignalNotify(VMCIContext *context);
 #  ifdef __linux__
index 6be0b6a1e8620980c25ca032098295681a47d8f3..4c7ebedc752e8e0103cb1b4752938a3335651d2d 100644 (file)
@@ -87,8 +87,43 @@ struct VMCIQPair {
    Bool guestEndpoint;
 };
 
-#define VMCI_QPAIR_NO_QUEUE(_qp) (!(_qp)->produceQ->qHeader || \
-                                  !(_qp)->consumeQ->qHeader)
+static int VMCIQPairMapQueueHeaders(VMCIQueue *produceQ, VMCIQueue *consumeQ);
+
+#define VMCI_QPAIR_NO_QUEUE(_qp) (VMCIQPairMapQueueHeaders(_qp->produceQ, \
+                                  _qp->consumeQ) != VMCI_SUCCESS)
+
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * VMCIQPairMapQueueHeaders --
+ *
+ *      The queue headers may not be mapped at all times. If a queue is
+ *      currently not mapped, it will be attempted to do so.
+ *
+ * Results:
+ *      VMCI_SUCCESS if queues were validated, appropriate error code otherwise.
+ *
+ * Side effects:
+ *      Windows blocking call.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+static int
+VMCIQPairMapQueueHeaders(VMCIQueue *produceQ, // IN
+                         VMCIQueue *consumeQ) // IN
+{
+   int result;
+
+   if (NULL == produceQ->qHeader || NULL == consumeQ->qHeader) {
+      result = VMCIHost_MapQueueHeaders(produceQ, consumeQ);
+      if (result < VMCI_SUCCESS) {
+         return result;
+      }
+   }
+
+   return VMCI_SUCCESS;
+}
 
 
 /*
@@ -632,7 +667,7 @@ VMCIQPair_ConsumeBufReady(const VMCIQPair *qpair) // IN
 
 static INLINE ssize_t
 EnqueueLocked(VMCIQueue *produceQ,                   // IN
-              const VMCIQueue *consumeQ,             // IN
+              VMCIQueue *consumeQ,                   // IN
               const uint64 produceQSize,             // IN
               const void *buf,                       // IN
               size_t bufSize,                        // IN
@@ -649,7 +684,7 @@ EnqueueLocked(VMCIQueue *produceQ,                   // IN
       return (ssize_t) bufSize;
    }
 
-   if (UNLIKELY(!produceQ->qHeader || !consumeQ->qHeader)) {
+   if (UNLIKELY(VMCIQPairMapQueueHeaders(produceQ, consumeQ) != VMCI_SUCCESS)) {
       return VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
    }
 #endif
@@ -714,7 +749,7 @@ EnqueueLocked(VMCIQueue *produceQ,                   // IN
 
 static INLINE ssize_t
 DequeueLocked(VMCIQueue *produceQ,                        // IN
-              const VMCIQueue *consumeQ,                  // IN
+              VMCIQueue *consumeQ,                        // IN
               const uint64 consumeQSize,                  // IN
               void *buf,                                  // IN
               size_t bufSize,                             // IN
@@ -728,8 +763,7 @@ DequeueLocked(VMCIQueue *produceQ,                        // IN
    ssize_t result;
 
 #if !defined VMX86_VMX
-   if (UNLIKELY(!produceQ->qHeader ||
-                !consumeQ->qHeader)) {
+   if (UNLIKELY(VMCIQPairMapQueueHeaders(produceQ, consumeQ) != VMCI_SUCCESS)) {
       return VMCI_ERROR_QUEUEPAIR_NODATA;
    }
 #endif
@@ -809,6 +843,8 @@ VMCIQPair_Enqueue(VMCIQPair *qpair,        // IN
                           qpair->consumeQ,
                           qpair->produceQSize,
                           buf, bufSize, bufType,
+                          qpair->flags & VMCI_QPFLAG_LOCAL?
+                          VMCIMemcpyToQueueLocal:
                           VMCIMemcpyToQueue);
 
    VMCIQPairUnlock(qpair);
@@ -853,6 +889,8 @@ VMCIQPair_Dequeue(VMCIQPair *qpair,        // IN
                           qpair->consumeQ,
                           qpair->consumeQSize,
                           buf, bufSize, bufType,
+                          qpair->flags & VMCI_QPFLAG_LOCAL?
+                          VMCIMemcpyFromQueueLocal:
                           VMCIMemcpyFromQueue,
                           TRUE);
 
@@ -899,6 +937,8 @@ VMCIQPair_Peek(VMCIQPair *qpair,    // IN
                           qpair->consumeQ,
                           qpair->consumeQSize,
                           buf, bufSize, bufType,
+                          qpair->flags & VMCI_QPFLAG_LOCAL?
+                          VMCIMemcpyFromQueueLocal:
                           VMCIMemcpyFromQueue,
                           FALSE);
 
index a79de1eb422afb993aa6df15557e8989db363c9f..cb988fab726cae1c4540c24a51dd85557628435a 100644 (file)
 #if defined(VMKERNEL)
 #  include "vmciVmkInt.h"
 #  include "vm_libc.h"
-#  include "helper_ext.h"
 #endif
 
 #define LGPFX "VMCIQueuePair: "
 
+
+/*
+ * In the following, we will distinguish between two kinds of VMX processes -
+ * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized
+ * VMCI page files in the VMX and supporting VM to VM communication) and the
+ * newer ones that use the guest memory directly. We will in the following refer
+ * to the older VMX versions as old-style VMX'en, and the newer ones as new-style
+ * VMX'en.
+ *
+ * The state transition datagram is as follows (the VMCIQPB_ prefix has been
+ * removed for readability) - see below for more details on the transtions:
+ *
+ *            --------------  NEW  -------------
+ *            |                                |
+ *           \_/                              \_/
+ *     CREATED_NO_MEM <-----------------> CREATED_MEM
+ *            |    |                           |
+ *            |    o-----------------------o   |
+ *            |                            |   |
+ *           \_/                          \_/ \_/
+ *     ATTACHED_NO_MEM <----------------> ATTACHED_MEM
+ *            |                            |   |
+ *            |     o----------------------o   |
+ *            |     |                          |
+ *           \_/   \_/                        \_/
+ *     SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM
+ *            |                                |
+ *            |                                |
+ *            -------------> gone <-------------
+ *
+ * In more detail. When a VMCI queue pair is first created, it will be in the
+ * VMCIQPB_NEW state. It will then move into one of the following states:
+ * - VMCIQPB_CREATED_NO_MEM: this state indicates that either:
+ *     - the created was performed by a host endpoint, in which case there is no
+ *       backing memory yet.
+ *     - the create was initiated by an old-style VMX, that uses
+ *       VMCIQPBroker_SetPageStore to specify the UVAs of the queue pair at a
+ *       later point in time. This state can be distinguished from the one above
+ *       by the context ID of the creator. A host side is not allowed to attach
+ *       until the page store has been set.
+ * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair is created
+ *     by a VMX using the queue pair device backend that sets the UVAs of the
+ *     queue pair immediately and stores the information for later attachers. At
+ *     this point, it is ready for the host side to attach to it.
+ * Once the queue pair is in one of the created states (with the exception of the
+ * case mentioned for older VMX'en above), it is possible to attach to the queue
+ * pair. Again we have two new states possible:
+ * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following paths:
+ *     - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue pair,
+ *       and attaches to a queue pair previously created by the host side.
+ *     - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair
+ *       already created by a guest.
+ *     - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls
+ *       VMCIQPBroker_SetPageStore (see below).
+ * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the
+ *     VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will
+ *     bring the queue pair into this state. Once VMCIQPBroker_SetPageStore is
+ *     called to register the user memory, the VMCIQPB_ATTACH_MEM state will be
+ *     entered.
+ * From the attached queue pair, the queue pair can enter the shutdown states
+ * when either side of the queue pair detaches. If the guest side detaches first,
+ * the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where the content
+ * of the queue pair will no longer be available. If the host side detaches first,
+ * the queue pair will enter the VMCIQPB_SHUTDOWN_MEM, since the guest memory
+ * will still be around.
+ *
+ * New-style VMX'en will also unmap guest memory, if the guest is quiesced, e.g.,
+ * during a snapshot operation. In that case, the guest memory will no longer be
+ * available, and the queue pair will transition from *_MEM state to a *_NO_MEM
+ * state. The VMX may later map the memory once more, in which case the queue
+ * pair will transition from the *_NO_MEM state at that point back to the *_MEM
+ * state. Note that the *_NO_MEM state may have changed, since the peer may have
+ * either attached or detached in the meantime. The values are laid out such that
+ * ++ on a state will move from a *_NO_MEM to a *_MEM state, and vice versa.
+ */
+
+typedef enum {
+   VMCIQPB_NEW,
+   VMCIQPB_CREATED_NO_MEM,
+   VMCIQPB_CREATED_MEM,
+   VMCIQPB_ATTACHED_NO_MEM,
+   VMCIQPB_ATTACHED_MEM,
+   VMCIQPB_SHUTDOWN_NO_MEM,
+   VMCIQPB_SHUTDOWN_MEM,
+   VMCIQPB_GONE
+} QPBrokerState;
+
+
 /*
  * The context that creates the QueuePair becomes producer of produce queue,
  * and consumer of consume queue. The context on other end for the QueuePair
@@ -65,25 +152,13 @@ typedef struct QPBrokerEntry {
    QueuePairEntry       qp;
    VMCIId               createId;
    VMCIId               attachId;
-   Bool                 pageStoreSet;
-   Bool                 allowAttach;
+   QPBrokerState        state;
    Bool                 requireTrustedAttach;
    Bool                 createdByTrusted;
-#ifdef VMKERNEL
-   QueuePairPageStore   store;
-#elif defined(__linux__) || defined(_WIN32) || defined(__APPLE__) || \
-      defined(SOLARIS)
-   /*
-    * Always created but only used if a host endpoint attaches to this
-    * queue.
-    */
-
+   Bool                 vmciPageFiles;  // Created by VMX using VMCI page files
    VMCIQueue           *produceQ;
    VMCIQueue           *consumeQ;
-   char                 producePageFile[VMCI_PATH_MAX];
-   char                 consumePageFile[VMCI_PATH_MAX];
-   PageStoreAttachInfo *attachInfo;
-#endif
+   void                *localMem; // Kernel memory for local queue pair
 } QPBrokerEntry;
 
 #if !defined(VMKERNEL)
@@ -105,6 +180,9 @@ typedef struct QueuePairList {
 
 static QueuePairList qpBrokerList;
 
+#define QPE_NUM_PAGES(_QPE) ((uint32)(CEILING(_QPE.produceSize, PAGE_SIZE) + \
+                                      CEILING(_QPE.consumeSize, PAGE_SIZE) + 2))
+
 #if !defined(VMKERNEL)
   static QueuePairList qpGuestEndpoints;
   static VMCIHandleArray *hibernateFailedList;
@@ -121,15 +199,33 @@ static QueuePairEntry *QueuePairList_GetHead(QueuePairList *qpList);
 
 static int QueuePairNotifyPeer(Bool attach, VMCIHandle handle, VMCIId myId,
                                VMCIId peerId);
+
 static int VMCIQPBrokerAllocInt(VMCIHandle handle, VMCIId peer,
                                 uint32 flags, VMCIPrivilegeFlags privFlags,
                                 uint64 produceSize,
                                 uint64 consumeSize,
                                 QueuePairPageStore *pageStore,
                                 VMCIContext *context,
-                                QPBrokerEntry **ent);
-
-#if !defined(VMKERNEL)
+                                QPBrokerEntry **ent,
+                                Bool *swap);
+static int VMCIQPBrokerAttach(QPBrokerEntry *entry,
+                              VMCIId peer,
+                              uint32 flags,
+                              VMCIPrivilegeFlags privFlags,
+                              uint64 produceSize,
+                              uint64 consumeSize,
+                              QueuePairPageStore *pageStore,
+                              VMCIContext *context,
+                              QPBrokerEntry **ent);
+static int VMCIQPBrokerCreate(VMCIHandle handle,
+                              VMCIId peer,
+                              uint32 flags,
+                              VMCIPrivilegeFlags privFlags,
+                              uint64 produceSize,
+                              uint64 consumeSize,
+                              QueuePairPageStore *pageStore,
+                              VMCIContext *context,
+                              QPBrokerEntry **ent);
 static int VMCIQueuePairAllocHostWork(VMCIHandle *handle, VMCIQueue **produceQ,
                                       uint64 produceSize, VMCIQueue **consumeQ,
                                       uint64 consumeSize,
@@ -137,6 +233,8 @@ static int VMCIQueuePairAllocHostWork(VMCIHandle *handle, VMCIQueue **produceQ,
                                       VMCIPrivilegeFlags privFlags);
 static int VMCIQueuePairDetachHostWork(VMCIHandle handle);
 
+#if !defined(VMKERNEL)
+
 static int QueuePairNotifyPeerLocal(Bool attach, VMCIHandle handle);
 
 static QPGuestEndpoint *QPGuestEndpointCreate(VMCIHandle handle,
@@ -158,6 +256,8 @@ static void VMCIQPUnmarkHibernateFailed(QPGuestEndpoint *entry);
 
 extern int VMCI_SendDatagram(VMCIDatagram *);
 
+#endif
+
 
 /*
  *-----------------------------------------------------------------------------
@@ -194,8 +294,12 @@ VMCIQueuePair_Alloc(VMCIHandle *handle,           // IN/OUT
    }
 
    if (guestEndpoint) {
+#if !defined(VMKERNEL)
       return VMCIQueuePairAllocGuestWork(handle, produceQ, produceSize, consumeQ,
                                          consumeSize, peer, flags, privFlags);
+#else
+      return VMCI_ERROR_INVALID_ARGS;
+#endif
    } else {
       return VMCIQueuePairAllocHostWork(handle, produceQ, produceSize, consumeQ,
                                         consumeSize, peer, flags, privFlags);
@@ -229,12 +333,15 @@ VMCIQueuePair_Detach(VMCIHandle handle,   // IN
    }
 
    if (guestEndpoint) {
+#if !defined(VMKERNEL)
       return VMCIQueuePairDetachGuestWork(handle);
+#else
+      return VMCI_ERROR_INVALID_ARGS;
+#endif
    } else {
       return VMCIQueuePairDetachHostWork(handle);
    }
 }
-#endif // !defined(VMKERNEL)
 
 
 /*
@@ -592,7 +699,185 @@ VMCIQPBroker_Alloc(VMCIHandle handle,             // IN
 {
    return VMCIQPBrokerAllocInt(handle, peer, flags, privFlags,
                                produceSize, consumeSize,
-                               pageStore, context, NULL);
+                               pageStore, context, NULL, NULL);
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * QueuePairNotifyPeer --
+ *
+ *      Enqueues an event datagram to notify the peer VM attached to
+ *      the given queue pair handle about attach/detach event by the
+ *      given VM.
+ *
+ * Results:
+ *      Payload size of datagram enqueued on success, error code otherwise.
+ *
+ * Side effects:
+ *      Memory is allocated.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+int
+QueuePairNotifyPeer(Bool attach,       // IN: attach or detach?
+                    VMCIHandle handle, // IN
+                    VMCIId myId,       // IN
+                    VMCIId peerId)     // IN: CID of VM to notify
+{
+   int rv;
+   VMCIEventMsg *eMsg;
+   VMCIEventPayload_QP *evPayload;
+   char buf[sizeof *eMsg + sizeof *evPayload];
+
+   if (VMCI_HANDLE_INVALID(handle) || myId == VMCI_INVALID_ID ||
+       peerId == VMCI_INVALID_ID) {
+      return VMCI_ERROR_INVALID_ARGS;
+   }
+
+   /*
+    * Notification message contains: queue pair handle and
+    * attaching/detaching VM's context id.
+    */
+
+   eMsg = (VMCIEventMsg *)buf;
+
+   /*
+    * In VMCIContext_EnqueueDatagram() we enforce the upper limit on number of
+    * pending events from the hypervisor to a given VM otherwise a rogue VM
+    * could do an arbitrary number of attach and detach operations causing memory
+    * pressure in the host kernel.
+   */
+
+   /* Clear out any garbage. */
+   memset(eMsg, 0, sizeof buf);
+
+   eMsg->hdr.dst = VMCI_MAKE_HANDLE(peerId, VMCI_EVENT_HANDLER);
+   eMsg->hdr.src = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
+                                    VMCI_CONTEXT_RESOURCE_ID);
+   eMsg->hdr.payloadSize = sizeof *eMsg + sizeof *evPayload - sizeof eMsg->hdr;
+   eMsg->eventData.event = attach ? VMCI_EVENT_QP_PEER_ATTACH :
+                                    VMCI_EVENT_QP_PEER_DETACH;
+   evPayload = VMCIEventMsgPayload(eMsg);
+   evPayload->handle = handle;
+   evPayload->peerId = myId;
+
+   rv = VMCIDatagram_Dispatch(VMCI_HYPERVISOR_CONTEXT_ID, (VMCIDatagram *)eMsg,
+                              FALSE);
+   if (rv < VMCI_SUCCESS) {
+      VMCI_WARNING((LGPFX"Failed to enqueue QueuePair %s event datagram for "
+                    "context (ID=0x%x).\n", attach ? "ATTACH" : "DETACH",
+                    peerId));
+   }
+
+   return rv;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * VMCIQueuePairAllocHostWork --
+ *
+ *    This function implements the kernel API for allocating a queue
+ *    pair.
+ *
+ * Results:
+ *     VMCI_SUCCESS on succes and appropriate failure code otherwise.
+ *
+ * Side effects:
+ *     May allocate memory.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+VMCIQueuePairAllocHostWork(VMCIHandle *handle,           // IN/OUT
+                           VMCIQueue **produceQ,         // OUT
+                           uint64 produceSize,           // IN
+                           VMCIQueue **consumeQ,         // OUT
+                           uint64 consumeSize,           // IN
+                           VMCIId peer,                  // IN
+                           uint32 flags,                 // IN
+                           VMCIPrivilegeFlags privFlags) // IN
+{
+   VMCIContext *context;
+   QPBrokerEntry *entry;
+   int result;
+   Bool swap;
+
+   if (VMCI_HANDLE_INVALID(*handle)) {
+      VMCIId resourceID = VMCIResource_GetID(VMCI_HOST_CONTEXT_ID);
+      if (resourceID == VMCI_INVALID_ID) {
+         return VMCI_ERROR_NO_HANDLE;
+      }
+      *handle = VMCI_MAKE_HANDLE(VMCI_HOST_CONTEXT_ID, resourceID);
+   }
+
+   context = VMCIContext_Get(VMCI_HOST_CONTEXT_ID);
+   ASSERT(context);
+
+   entry = NULL;
+   VMCIQPBroker_Lock();
+   result = VMCIQPBrokerAllocInt(*handle, peer, flags, privFlags, produceSize,
+                                 consumeSize, NULL, context, &entry, &swap);
+   if (result == VMCI_SUCCESS) {
+      if (swap) {
+         /*
+          * If this is a local queue pair, the attacher will swap around produce
+          * and consume queues.
+          */
+
+         *produceQ = entry->consumeQ;
+         *consumeQ = entry->produceQ;
+      } else {
+         *produceQ = entry->produceQ;
+         *consumeQ = entry->consumeQ;
+      }
+   } else {
+      *handle = VMCI_INVALID_HANDLE;
+      VMCI_DEBUG_LOG(4, (LGPFX"queue pair broker failed to alloc (result=%d).\n",
+                         result));
+   }
+   VMCIQPBroker_Unlock();
+   VMCIContext_Release(context);
+   return result;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * VMCIQueuePairDetachHostWork --
+ *
+ *    This function implements the host kernel API for detaching from
+ *    a queue pair.
+ *
+ * Results:
+ *     VMCI_SUCCESS on success and appropriate failure code otherwise.
+ *
+ * Side effects:
+ *     May deallocate memory.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+VMCIQueuePairDetachHostWork(VMCIHandle handle) // IN
+{
+   int result;
+   VMCIContext *context;
+
+   context = VMCIContext_Get(VMCI_HOST_CONTEXT_ID);
+
+   VMCIQPBroker_Lock();
+   result = VMCIQPBroker_Detach(handle, context);
+   VMCIQPBroker_Unlock();
+
+   VMCIContext_Release(context);
+   return result;
 }
 
 
@@ -623,12 +908,14 @@ VMCIQPBrokerAllocInt(VMCIHandle handle,             // IN
                      uint64 consumeSize,            // IN
                      QueuePairPageStore *pageStore, // IN/OUT
                      VMCIContext *context,          // IN: Caller
-                     QPBrokerEntry **ent)           // OUT
+                     QPBrokerEntry **ent,           // OUT
+                     Bool *swap)                    // OUT: swap queues?
 {
-   QPBrokerEntry *entry = NULL;
-   int result;
    const VMCIId contextId = VMCIContext_GetId(context);
+   Bool create;
+   QPBrokerEntry *entry;
    Bool isLocal = flags & VMCI_QPFLAG_LOCAL;
+   int result;
 
    if (VMCI_HANDLE_INVALID(handle) ||
        (flags & ~VMCI_QP_ALL_FLAGS) ||
@@ -640,20 +927,9 @@ VMCIQPBrokerAllocInt(VMCIHandle handle,             // IN
       return VMCI_ERROR_INVALID_ARGS;
    }
 
-#ifdef VMKERNEL
-   if (!pageStore || (!pageStore->shared && !isLocal)) {
-      return VMCI_ERROR_INVALID_ARGS;
-   }
-#else
-   /*
-    * On hosted, pageStore can be NULL if the caller doesn't want the
-    * information
-    */
    if (pageStore && !VMCI_QP_PAGESTORE_IS_WELLFORMED(pageStore)) {
       return VMCI_ERROR_INVALID_ARGS;
    }
-#endif // VMKERNEL
-
 
    /*
     * In the initial argument check, we ensure that non-vmkernel hosts
@@ -666,354 +942,416 @@ VMCIQPBrokerAllocInt(VMCIHandle handle,             // IN
       VMCI_DEBUG_LOG(4, (LGPFX"Context (ID=0x%x) already attached to queue pair "
                          "(handle=0x%x:0x%x).\n",
                          contextId, handle.context, handle.resource));
-      result = VMCI_ERROR_ALREADY_EXISTS;
-      goto out;
+      return VMCI_ERROR_ALREADY_EXISTS;
    }
 
    entry = (QPBrokerEntry *)QueuePairList_FindEntry(&qpBrokerList, handle);
-   if (!entry) { /* Create case. */
-      /*
-       * Do not create if the caller asked not to.
-       */
+   if (!entry) {
+      create = TRUE;
+      result = VMCIQPBrokerCreate(handle, peer, flags, privFlags, produceSize,
+                                  consumeSize, pageStore, context, ent);
+   } else {
+      create = FALSE;
+      result = VMCIQPBrokerAttach(entry, peer, flags, privFlags, produceSize,
+                                  consumeSize, pageStore, context, ent);
+   }
 
-      if (flags & VMCI_QPFLAG_ATTACH_ONLY) {
-         result = VMCI_ERROR_NOT_FOUND;
-         goto out;
-      }
+   if (swap) {
+      *swap = (contextId == VMCI_HOST_CONTEXT_ID) && !(create && isLocal);
+   }
 
-      /*
-       * Creator's context ID should match handle's context ID or the creator
-       * must allow the context in handle's context ID as the "peer".
-       */
+   return result;
+}
 
-      if (handle.context != contextId && handle.context != peer) {
-         result = VMCI_ERROR_NO_ACCESS;
-         goto out;
-      }
 
-      /*
-       * Check if we should allow this QueuePair connection.
-       */
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * VMCIQPBrokerCreate --
+ *
+ *      The first endpoint issuing a queue pair allocation will create the state
+ *      of the queue pair in the queue pair broker.
+ *
+ *      If the creator is a guest, it will associate a VMX virtual address range
+ *      with the queue pair as specified by the pageStore. For compatibility with
+ *      older VMX'en, that would use a separate step to set the VMX virtual
+ *      address range, the virtual address range can be registered later using
+ *      VMCIQPBroker_SetPageStore. In that case, a pageStore of NULL should be
+ *      used.
+ *
+ *      If the creator is the host, a pageStore of NULL should be used as well,
+ *      since the host is not able to supply a page store for the queue pair.
+ *
+ *      For older VMX and host callers, the queue pair will be created in the
+ *      VMCIQPB_CREATED_NO_MEM state, and for current VMX callers, it will be
+ *      created in VMCOQPB_CREATED_MEM state.
+ *
+ * Results:
+ *      VMCI_SUCCESS on success, appropriate error code otherwise.
+ *
+ * Side effects:
+ *      Memory will be allocated, and pages may be pinned.
+ *
+ *-----------------------------------------------------------------------------
+ */
 
-      if (QueuePairDenyConnection(contextId, peer)) {
-         result = VMCI_ERROR_NO_ACCESS;
-         goto out;
-      }
+static int
+VMCIQPBrokerCreate(VMCIHandle handle,             // IN
+                   VMCIId peer,                   // IN
+                   uint32 flags,                  // IN
+                   VMCIPrivilegeFlags privFlags,  // IN
+                   uint64 produceSize,            // IN
+                   uint64 consumeSize,            // IN
+                   QueuePairPageStore *pageStore, // IN
+                   VMCIContext *context,          // IN: Caller
+                   QPBrokerEntry **ent)           // OUT
+{
+   QPBrokerEntry *entry = NULL;
+   const VMCIId contextId = VMCIContext_GetId(context);
+   Bool isLocal = flags & VMCI_QPFLAG_LOCAL;
+   int result;
 
-      entry = VMCI_AllocKernelMem(sizeof *entry, VMCI_MEMORY_ATOMIC);
-      if (!entry) {
-         result = VMCI_ERROR_NO_MEM;
-         goto out;
-      }
+   /*
+    * Do not create if the caller asked not to.
+    */
 
-      memset(entry, 0, sizeof *entry);
-      entry->qp.handle = handle;
-      entry->qp.peer = peer;
-      entry->qp.flags = flags;
-      entry->qp.produceSize = produceSize;
-      entry->qp.consumeSize = consumeSize;
-      entry->qp.refCount = 1;
-      entry->createId = contextId;
-      entry->attachId = VMCI_INVALID_ID;
-      entry->pageStoreSet = FALSE;
-      entry->allowAttach = TRUE;
-      entry->requireTrustedAttach =
-         (context->privFlags & VMCI_PRIVILEGE_FLAG_RESTRICTED) ? TRUE : FALSE;
-      entry->createdByTrusted =
-         (privFlags & VMCI_PRIVILEGE_FLAG_TRUSTED) ? TRUE : FALSE;
+   if (flags & VMCI_QPFLAG_ATTACH_ONLY) {
+      return VMCI_ERROR_NOT_FOUND;
+   }
 
-#ifndef VMKERNEL
-      {
-         uint64 numProducePages;
-         uint64 numConsumePages;
-
-         entry->produceQ = VMCIHost_AllocQueue(produceSize);
-         if (entry->produceQ == NULL) {
-            result = VMCI_ERROR_NO_MEM;
-            goto errorDealloc;
-         }
+   /*
+    * Creator's context ID should match handle's context ID or the creator
+    * must allow the context in handle's context ID as the "peer".
+    */
 
-         entry->consumeQ = VMCIHost_AllocQueue(consumeSize);
-         if (entry->consumeQ == NULL) {
-            result = VMCI_ERROR_NO_MEM;
-            goto errorDealloc;
-         }
+   if ((handle.context != contextId && handle.context != peer) ||
+       QueuePairDenyConnection(contextId, peer)) {
+      return VMCI_ERROR_NO_ACCESS;
+   }
 
-         entry->attachInfo = VMCI_AllocKernelMem(sizeof *entry->attachInfo,
-                                                 VMCI_MEMORY_NORMAL);
-         if (entry->attachInfo == NULL) {
-            result = VMCI_ERROR_NO_MEM;
-            goto errorDealloc;
-         }
-         memset(entry->attachInfo, 0, sizeof *entry->attachInfo);
+   /*
+    * Check if we should allow this QueuePair connection.
+    */
 
-         VMCI_InitQueueMutex(entry->produceQ, entry->consumeQ);
+   if (QueuePairDenyConnection(contextId, peer)) {
+      return VMCI_ERROR_NO_ACCESS;
+   }
 
-         numProducePages = CEILING(produceSize, PAGE_SIZE) + 1;
-         numConsumePages = CEILING(consumeSize, PAGE_SIZE) + 1;
+   entry = VMCI_AllocKernelMem(sizeof *entry, VMCI_MEMORY_ATOMIC);
+   if (!entry) {
+      return VMCI_ERROR_NO_MEM;
+   }
 
-         entry->attachInfo->numProducePages = numProducePages;
-         entry->attachInfo->numConsumePages = numConsumePages;
-      }
-#endif /* !VMKERNEL */
+   memset(entry, 0, sizeof *entry);
+   entry->qp.handle = handle;
+   entry->qp.peer = peer;
+   entry->qp.flags = flags;
+   entry->qp.produceSize = produceSize;
+   entry->qp.consumeSize = consumeSize;
+   entry->qp.refCount = 1;
+   entry->createId = contextId;
+   entry->attachId = VMCI_INVALID_ID;
+   entry->state = VMCIQPB_NEW;
+   entry->requireTrustedAttach =
+      (context->privFlags & VMCI_PRIVILEGE_FLAG_RESTRICTED) ? TRUE : FALSE;
+   entry->createdByTrusted =
+      (privFlags & VMCI_PRIVILEGE_FLAG_TRUSTED) ? TRUE : FALSE;
+   entry->vmciPageFiles = FALSE;
+   entry->produceQ = VMCIHost_AllocQueue(produceSize);
+   if (entry->produceQ == NULL) {
+      result = VMCI_ERROR_NO_MEM;
+      goto error;
+   }
 
-      VMCIList_InitEntry(&entry->qp.listItem);
+   entry->consumeQ = VMCIHost_AllocQueue(consumeSize);
+   if (entry->consumeQ == NULL) {
+      result = VMCI_ERROR_NO_MEM;
+      goto error;
+   }
 
-      QueuePairList_AddEntry(&qpBrokerList, &entry->qp);
-      result = VMCI_SUCCESS_QUEUEPAIR_CREATE;
-   } else { /* Attach case. */
+   VMCI_InitQueueMutex(entry->produceQ, entry->consumeQ);
 
-      /*
-       * Check for failure conditions.
-       */
+   VMCIList_InitEntry(&entry->qp.listItem);
 
-      if (isLocal) {
-         if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL) ||
-             contextId != entry->createId) {
-            result = VMCI_ERROR_INVALID_ARGS;
-            goto out;
-         }
-      } else if (contextId == entry->createId || contextId == entry->attachId) {
-         result = VMCI_ERROR_ALREADY_EXISTS;
-         goto out;
+   if (isLocal) {
+      ASSERT(pageStore == NULL);
+
+      entry->localMem = VMCI_AllocKernelMem(QPE_NUM_PAGES(entry->qp) * PAGE_SIZE,
+                                            VMCI_MEMORY_NONPAGED);
+      if (entry->localMem == NULL) {
+         result = VMCI_ERROR_NO_MEM;
+         goto error;
       }
+      entry->state = VMCIQPB_CREATED_MEM;
+      entry->produceQ->qHeader = entry->localMem;
+      entry->consumeQ->qHeader =
+         (VMCIQueueHeader *)((uint8 *)entry->localMem +
+             (CEILING(entry->qp.produceSize, PAGE_SIZE) + 1) * PAGE_SIZE);
+      VMCIQueueHeader_Init(entry->produceQ->qHeader, handle);
+      VMCIQueueHeader_Init(entry->consumeQ->qHeader, handle);
+   } else if (pageStore) {
+      ASSERT(entry->createId != VMCI_HOST_CONTEXT_ID || isLocal);
 
       /*
-       * QueuePairs are create/destroy entities.  There's no notion of
-       * disconnecting/re-attaching, so once a queue pair entry has
-       * been attached to, no further attaches are allowed. This
-       * guards against both re-attaching and attaching to a queue
-       * pair that already has two peers.
+       * The VMX already initialized the queue pair headers, so no
+       * need for the kernel side to do that.
        */
 
-      if (!entry->allowAttach) {
-         result = VMCI_ERROR_UNAVAILABLE;
-         goto out;
+      result = VMCIHost_RegisterUserMemory(pageStore,
+                                           entry->produceQ,
+                                           entry->consumeQ);
+      if (result < VMCI_SUCCESS) {
+         goto error;
       }
-      ASSERT(entry->qp.refCount < 2);
-      ASSERT(entry->attachId == VMCI_INVALID_ID);
-
+      entry->state = VMCIQPB_CREATED_MEM;
+   } else {
       /*
-       * If we are attaching from a restricted context then the queuepair
-       * must have been created by a trusted endpoint.
+       * A create without a pageStore may be either a host side create (in which
+       * case we are waiting for the guest side to supply the memory) or an old
+       * style queue pair create (in which case we will expect a set page store
+       * call as the next step).
        */
 
-      if (context->privFlags & VMCI_PRIVILEGE_FLAG_RESTRICTED) {
-         if (!entry->createdByTrusted) {
-            result = VMCI_ERROR_NO_ACCESS;
-            goto out;
-         }
-      }
+      entry->state = VMCIQPB_CREATED_NO_MEM;
+   }
 
-      /*
-       * If we are attaching to a queuepair that was created by a restricted
-       * context then we must be trusted.
-       */
+   QueuePairList_AddEntry(&qpBrokerList, &entry->qp);
+   if (ent != NULL) {
+      *ent = entry;
+   }
 
-      if (entry->requireTrustedAttach) {
-         if (!(privFlags & VMCI_PRIVILEGE_FLAG_TRUSTED)) {
-            result = VMCI_ERROR_NO_ACCESS;
-            goto out;
-         }
-      }
+   ASSERT(!VMCIHandleArray_HasEntry(context->queuePairArray, handle));
+   VMCIHandleArray_AppendEntry(&context->queuePairArray, handle);
 
-      /*
-       * If the creator specifies VMCI_INVALID_ID in "peer" field, access
-       * control check is not performed.
-       */
+   return VMCI_SUCCESS;
 
-      if (entry->qp.peer != VMCI_INVALID_ID && entry->qp.peer != contextId) {
-         result = VMCI_ERROR_NO_ACCESS;
-         goto out;
+error:
+   if (entry != NULL) {
+      if (entry->produceQ != NULL) {
+         VMCIHost_FreeQueue(entry->produceQ, produceSize);
       }
+      if (entry->consumeQ != NULL) {
+         VMCIHost_FreeQueue(entry->consumeQ, consumeSize);
+      }
+      VMCI_FreeKernelMem(entry, sizeof *entry);
+   }
+   return result;
+}
 
-#ifndef VMKERNEL
-      /*
-       * VMKernel doesn't need to check the capabilities because the
-       * whole system is installed as the kernel and matching VMX.
-       */
 
-      if (entry->createId == VMCI_HOST_CONTEXT_ID) {
-         /*
-          * Do not attach if the caller doesn't support Host Queue Pairs
-          * and a host created this queue pair.
-          */
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * VMCIQPBrokerAttach --
+ *
+ *      The second endpoint issuing a queue pair allocation will attach to the
+ *      queue pair registered with the queue pair broker.
+ *
+ *      If the attacher is a guest, it will associate a VMX virtual address range
+ *      with the queue pair as specified by the pageStore. At this point, the
+ *      already attach host endpoint may start using the queue pair, and an
+ *      attach event is sent to it. For compatibility with older VMX'en, that
+ *      used a separate step to set the VMX virtual address range, the virtual
+ *      address range can be registered later using VMCIQPBroker_SetPageStore. In
+ *      that case, a pageStore of NULL should be used, and the attach event will
+ *      be generated once the actual page store has been set.
+ *
+ *      If the attacher is the host, a pageStore of NULL should be used as well,
+ *      since the page store information is already set by the guest.
+ *
+ *      For new VMX and host callers, the queue pair will be moved to the
+ *      VMCIQPB_ATTACHED_MEM state, and for older VMX callers, it will be
+ *      moved to the VMCOQPB_ATTACHED_NO_MEM state.
+ *
+ * Results:
+ *      VMCI_SUCCESS on success, appropriate error code otherwise.
+ *
+ * Side effects:
+ *      Memory will be allocated, and pages may be pinned.
+ *
+ *-----------------------------------------------------------------------------
+ */
 
-         if (!VMCIContext_SupportsHostQP(context)) {
-            result = VMCI_ERROR_INVALID_RESOURCE;
-            goto out;
-         }
-      } else if (contextId == VMCI_HOST_CONTEXT_ID) {
-         VMCIContext *createContext;
-         Bool supportsHostQP;
+static int
+VMCIQPBrokerAttach(QPBrokerEntry *entry,          // IN
+                   VMCIId peer,                   // IN
+                   uint32 flags,                  // IN
+                   VMCIPrivilegeFlags privFlags,  // IN
+                   uint64 produceSize,            // IN
+                   uint64 consumeSize,            // IN
+                   QueuePairPageStore *pageStore, // IN/OUT
+                   VMCIContext *context,          // IN: Caller
+                   QPBrokerEntry **ent)           // OUT
+{
+   const VMCIId contextId = VMCIContext_GetId(context);
+   Bool isLocal = flags & VMCI_QPFLAG_LOCAL;
+   int result;
 
-         /*
-          * Do not attach a host to a user created queue pair if that
-          * user doesn't support host queue pair end points.
-          */
+   if (entry->state != VMCIQPB_CREATED_NO_MEM &&
+       entry->state != VMCIQPB_CREATED_MEM) {
+      return VMCI_ERROR_UNAVAILABLE;
+   }
 
-         createContext = VMCIContext_Get(entry->createId);
-         supportsHostQP = VMCIContext_SupportsHostQP(createContext);
-         VMCIContext_Release(createContext);
+   if (isLocal) {
+      if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL) ||
+          contextId != entry->createId) {
+         return VMCI_ERROR_INVALID_ARGS;
+      }
+   } else if (contextId == entry->createId || contextId == entry->attachId) {
+      return VMCI_ERROR_ALREADY_EXISTS;
+   }
 
-         if (!supportsHostQP) {
-            result = VMCI_ERROR_INVALID_RESOURCE;
-            goto out;
-         }
+   ASSERT(entry->qp.refCount < 2);
+   ASSERT(entry->attachId == VMCI_INVALID_ID);
+
+   /*
+    * If we are attaching from a restricted context then the queuepair
+    * must have been created by a trusted endpoint.
+    */
+
+   if (context->privFlags & VMCI_PRIVILEGE_FLAG_RESTRICTED) {
+      if (!entry->createdByTrusted) {
+         return VMCI_ERROR_NO_ACCESS;
       }
-#endif // !VMKERNEL
+   }
 
-      if (entry->qp.produceSize != consumeSize ||
-          entry->qp.consumeSize != produceSize ||
-          entry->qp.flags != (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) {
-         result = VMCI_ERROR_QUEUEPAIR_MISMATCH;
-         goto out;
+   /*
+    * If we are attaching to a queuepair that was created by a restricted
+    * context then we must be trusted.
+    */
+
+   if (entry->requireTrustedAttach) {
+      if (!(privFlags & VMCI_PRIVILEGE_FLAG_TRUSTED)) {
+         return VMCI_ERROR_NO_ACCESS;
       }
+   }
 
+   /*
+    * If the creator specifies VMCI_INVALID_ID in "peer" field, access
+    * control check is not performed.
+    */
+
+   if (entry->qp.peer != VMCI_INVALID_ID && entry->qp.peer != contextId) {
+      return VMCI_ERROR_NO_ACCESS;
+   }
+
+   /*
+    * Check if we should allow this QueuePair connection.
+    */
+
+   if (QueuePairDenyConnection(contextId, entry->createId)) {
+      return VMCI_ERROR_NO_ACCESS;
+   }
+
+   if (entry->createId == VMCI_HOST_CONTEXT_ID) {
       /*
-       * On VMKERNEL (e.g., ESX) we don't allow an attach until
-       * the page store information has been set.
-       *
-       * However, on hosted products we support an attach to a
-       * QueuePair that hasn't had its page store established yet.  In
-       * fact, that's how a VMX guest will approach a host-created
-       * QueuePair.  After the VMX guest does the attach, VMX will
-       * receive the CREATE status code to indicate that it should
-       * create the page files for the QueuePair contents.  It will
-       * then issue a separate call down to set the page store.  That
-       * will complete the attach case.
+       * Do not attach if the caller doesn't support Host Queue Pairs
+       * and a host created this queue pair.
        */
-      if (vmkernel && !entry->pageStoreSet) {
-         result = VMCI_ERROR_QUEUEPAIR_NOTSET;
-         goto out;
+
+      if (!VMCIContext_SupportsHostQP(context)) {
+         return VMCI_ERROR_INVALID_RESOURCE;
       }
+   } else if (contextId == VMCI_HOST_CONTEXT_ID) {
+      VMCIContext *createContext;
+      Bool supportsHostQP;
 
       /*
-       * Check if we should allow this QueuePair connection.
+       * Do not attach a host to a user created queue pair if that
+       * user doesn't support host queue pair end points.
        */
 
-      if (QueuePairDenyConnection(contextId, entry->createId)) {
-         result = VMCI_ERROR_NO_ACCESS;
-         goto out;
+      createContext = VMCIContext_Get(entry->createId);
+      supportsHostQP = VMCIContext_SupportsHostQP(createContext);
+      VMCIContext_Release(createContext);
+
+      if (!supportsHostQP) {
+         return VMCI_ERROR_INVALID_RESOURCE;
       }
+   }
 
-#ifdef VMKERNEL
-      pageStore->store = entry->store.store;
-#else
-      if (pageStore && entry->pageStoreSet) {
-         ASSERT(entry->producePageFile[0] && entry->consumePageFile[0]);
-         if (pageStore->producePageFileSize < sizeof entry->consumePageFile) {
-            result = VMCI_ERROR_NO_MEM;
-            goto out;
-         }
-         if (pageStore->consumePageFileSize < sizeof entry->producePageFile) {
-            result = VMCI_ERROR_NO_MEM;
-            goto out;
-         }
+   if (entry->qp.produceSize != consumeSize ||
+       entry->qp.consumeSize != produceSize ||
+       entry->qp.flags != (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) {
+      return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+   }
 
-         if (pageStore->user) {
-            if (VMCI_CopyToUser(pageStore->producePageFile,
-                                entry->consumePageFile,
-                                sizeof entry->consumePageFile)) {
-               result = VMCI_ERROR_GENERIC;
-               goto out;
-            }
+   if (contextId != VMCI_HOST_CONTEXT_ID) {
+      /*
+       * If a guest attached to a queue pair, it will supply the backing memory.
+       * If this is a pre NOVMVM vmx, the backing memory will be supplied by
+       * calling VMCIQPBroker_SetPageStore() following the return of the
+       * VMCIQPBroker_Alloc() call. If it is a vmx of version NOVMVM or later,
+       * the page store must be supplied as part of the VMCIQPBroker_Alloc call.
+       * Under all circumstances must the initially created queue pair not have
+       * any memory associated with it already.
+       */
 
-            if (VMCI_CopyToUser(pageStore->consumePageFile,
-                                entry->producePageFile,
-                                sizeof entry->producePageFile)) {
-               result = VMCI_ERROR_GENERIC;
-               goto out;
-            }
-         } else {
-            memcpy(VMCIVA64ToPtr(pageStore->producePageFile),
-                   entry->consumePageFile,
-                   sizeof entry->consumePageFile);
-            memcpy(VMCIVA64ToPtr(pageStore->consumePageFile),
-                   entry->producePageFile,
-                   sizeof entry->producePageFile);
-         }
+      if (entry->state != VMCIQPB_CREATED_NO_MEM) {
+         return VMCI_ERROR_INVALID_ARGS;
       }
-#endif // VMKERNEL
 
-      /*
-       * We only send notification if the other end of the QueuePair
-       * is not the host (in hosted products).  In the case that a
-       * host created the QueuePair, we'll send notification when the
-       * guest issues the SetPageStore() (see next function).  The
-       * reason is that the host can't use the QueuePair until the
-       * SetPageStore() is complete.
-       *
-       * Note that in ESX we always send the notification now
-       * because the host can begin to enqueue immediately.
-       */
+      if (pageStore != NULL) {
+         /*
+          * Patch up host state to point to guest supplied memory. The VMX
+          * already initialized the queue pair headers, so no need for the
+          * kernel side to do that.
+          */
 
-      if (vmkernel || entry->createId != VMCI_HOST_CONTEXT_ID) {
-         result = QueuePairNotifyPeer(TRUE, handle, contextId, entry->createId);
+         result = VMCIHost_RegisterUserMemory(pageStore,
+                                              entry->produceQ,
+                                              entry->consumeQ);
          if (result < VMCI_SUCCESS) {
-            goto out;
+            return result;
          }
+         entry->state = VMCIQPB_ATTACHED_MEM;
+      } else {
+         entry->state = VMCIQPB_ATTACHED_NO_MEM;
       }
+   } else if (entry->state == VMCIQPB_CREATED_NO_MEM) {
+      /*
+       * The host side is attempting to attach to a queue pair that doesn't have
+       * any memory associated with it. This must be a pre NOVMVM vmx that hasn't
+       * set the page store information yet, or a quiesced VM.
+       */
 
-      entry->attachId = contextId;
-      entry->qp.refCount++;
-      entry->allowAttach = FALSE;
-
+      return VMCI_ERROR_UNAVAILABLE;
+   } else {
       /*
-       * Default response to an attach is _ATTACH.  However, if a host
-       * created the QueuePair then we're a guest (because
-       * host-to-host isn't supported).  And thus, the guest's VMX
-       * needs to create the backing for the port.  So, we send up a
-       * _CREATE response.
+       * The host side has successfully attached to a queue pair.
        */
+      entry->state = VMCIQPB_ATTACHED_MEM;
+   }
 
-      if (!vmkernel && entry->createId == VMCI_HOST_CONTEXT_ID) {
-         result = VMCI_SUCCESS_QUEUEPAIR_CREATE;
-      } else {
-         result = VMCI_SUCCESS_QUEUEPAIR_ATTACH;
+   if (entry->state == VMCIQPB_ATTACHED_MEM) {
+      result = QueuePairNotifyPeer(TRUE, entry->qp.handle, contextId,
+                                   entry->createId);
+      if (result < VMCI_SUCCESS) {
+         VMCI_WARNING((LGPFX"Failed to notify peer (ID=0x%x) of attach to queue "
+                       "pair (handle=0x%x:0x%x).\n", entry->createId,
+                       entry->qp.handle.context, entry->qp.handle.resource));
       }
    }
 
-#ifndef VMKERNEL
-   goto out;
+   entry->attachId = contextId;
+   entry->qp.refCount++;
 
    /*
-    * Cleanup is only necessary on hosted
+    * When attaching to local queue pairs, the context already has
+    * an entry tracking the queue pair, so don't add another one.
     */
 
-errorDealloc:
-   if (entry->produceQ != NULL) {
-      VMCI_FreeKernelMem(entry->produceQ, sizeof *entry->produceQ);
-   }
-   if (entry->consumeQ != NULL) {
-      VMCI_FreeKernelMem(entry->consumeQ, sizeof *entry->consumeQ);
+   if (!isLocal) {
+      VMCIHandleArray_AppendEntry(&context->queuePairArray, entry->qp.handle);
+   } else {
+      ASSERT(VMCIHandleArray_HasEntry(context->queuePairArray,
+                                      entry->qp.handle));
    }
-   if (entry->attachInfo != NULL) {
-      VMCI_FreeKernelMem(entry->attachInfo, sizeof *entry->attachInfo);
+   if (ent != NULL) {
+      *ent = entry;
    }
-   VMCI_FreeKernelMem(entry, sizeof *entry);
-#endif // !VMKERNEL
 
-out:
-   if (result >= VMCI_SUCCESS) {
-      ASSERT(entry);
-      if (ent != NULL) {
-         *ent = entry;
-      }
-
-      /*
-       * When attaching to local queue pairs, the context already has
-       * an entry tracking the queue pair, so don't add another one.
-       */
-
-      if (!isLocal || result == VMCI_SUCCESS_QUEUEPAIR_CREATE) {
-         ASSERT(!VMCIHandleArray_HasEntry(context->queuePairArray, handle));
-         VMCIHandleArray_AppendEntry(&context->queuePairArray, handle);
-      } else {
-         ASSERT(VMCIHandleArray_HasEntry(context->queuePairArray, handle));
-      }
-   }
-   return result;
+   return VMCI_SUCCESS;
 }
 
 
@@ -1022,49 +1360,41 @@ out:
  *
  * VMCIQPBroker_SetPageStore --
  *
- *      The creator of a queue pair uses this to regsiter the page
- *      store for a given queue pair.  Assumes that the queue pair
- *      broker lock is held.
+ *      VMX'en with versions lower than VMCI_VERSION_NOVMVM use a separate
+ *      step to add the UVAs of the VMX mapping of the queue pair. This function
+ *      provides backwards compatibility with such VMX'en, and takes care of
+ *      registering the page store for a queue pair previously allocated by the
+ *      VMX during create or attach. This function will move the queue pair state
+ *      to either from VMCIQBP_CREATED_NO_MEM to VMCIQBP_CREATED_MEM or
+ *      VMCIQBP_ATTACHED_NO_MEM to VMCIQBP_ATTACHED_MEM. If moving to the
+ *      attached state with memory, the queue pair is ready to be used by the
+ *      host peer, and an attached event will be generated.
+ *
+ *      Assumes that the queue pair broker lock is held.
  *
- *      Note now that sometimes the client that attaches to a queue
- *      pair will set the page store.  This happens on hosted products
- *      because the host doesn't have a mechanism for creating the
- *      backing memory for queue contents.  ESX does and so this is a
- *      moot point there.  For example, note that in
- *      VMCIQPBrokerAllocInt() an attaching guest receives the _CREATE
- *      result code (instead of _ATTACH) on hosted products only, not
- *      on VMKERNEL.
- *
- *      As a result, this routine now always creates the host
- *      information even if the queue pair is only used by guests.  At
- *      the time a guest creates a queue pair it doesn't know if a
- *      host or guest will attach.  So, the host information always
- *      has to be created.
+ *      This function is only used by the hosted platform, since there is no
+ *      issue with backwards compatibility for vmkernel.
  *
  * Results:
- *      Success or failure.
+ *      VMCI_SUCCESS on success, appropriate error code otherwise.
  *
  * Side effects:
- *      None.
+ *      Pages may get pinned.
  *
  *-----------------------------------------------------------------------------
  */
 
 int
-VMCIQPBroker_SetPageStore(VMCIHandle handle,             // IN
-                          QueuePairPageStore *pageStore, // IN
-                          VMCIContext *context)          // IN: Caller
+VMCIQPBroker_SetPageStore(VMCIHandle handle,      // IN
+                          VA64 produceUVA,        // IN
+                          VA64 consumeUVA,        // IN
+                          VMCIContext *context)   // IN: Caller
 {
    QPBrokerEntry *entry;
    int result;
    const VMCIId contextId = VMCIContext_GetId(context);
-#ifndef VMKERNEL
-   QueuePairPageStore normalizedPageStore;
-#endif
 
-   if (VMCI_HANDLE_INVALID(handle) || !pageStore ||
-       !VMCI_QP_PAGESTORE_IS_WELLFORMED(pageStore) ||
-       !context || contextId == VMCI_INVALID_ID) {
+   if (VMCI_HANDLE_INVALID(handle) || !context || contextId == VMCI_INVALID_ID) {
       return VMCI_ERROR_INVALID_ARGS;
    }
 
@@ -1076,18 +1406,14 @@ VMCIQPBroker_SetPageStore(VMCIHandle handle,             // IN
       goto out;
    }
 
-#ifndef VMKERNEL
    /*
-    * If the client supports Host QueuePairs then it must provide the
-    * UVA's of the mmap()'d files backing the QueuePairs.
+    * We only support guest to host queue pairs, so the VMX must
+    * supply UVAs for the mapped page files.
     */
 
-   if (VMCIContext_SupportsHostQP(context) &&
-       (pageStore->producePageUVA == 0 ||
-        pageStore->consumePageUVA == 0)) {
+   if (produceUVA == 0 || consumeUVA == 0) {
       return VMCI_ERROR_INVALID_ARGS;
    }
-#endif // !VMKERNEL
 
    entry = (QPBrokerEntry *)QueuePairList_FindEntry(&qpBrokerList, handle);
    if (!entry) {
@@ -1108,121 +1434,42 @@ VMCIQPBroker_SetPageStore(VMCIHandle handle,             // IN
       result = VMCI_ERROR_QUEUEPAIR_NOTOWNER;
       goto out;
    }
-   if (entry->pageStoreSet) {
+
+   if (entry->state != VMCIQPB_CREATED_NO_MEM &&
+       entry->state != VMCIQPB_ATTACHED_NO_MEM) {
       result = VMCI_ERROR_UNAVAILABLE;
       goto out;
    }
-#ifdef VMKERNEL
-   entry->store = *pageStore;
-#else
-   /*
-    * Normalize the page store information from the point of view of
-    * the VMX process with respect to the QueuePair.  If VMX has
-    * attached to a host-created QueuePair and is passing down
-    * PageStore information then we must switch the produce/consume
-    * queue information before applying it to the QueuePair.
-    *
-    * In other words, the QueuePair structure (entry->state) is
-    * oriented with respect to the host that created it.  However, VMX
-    * is sending down information relative to its view of the world
-    * which is opposite of the host's.
-    */
 
-   if (entry->createId == contextId) {
-      normalizedPageStore.producePageFile = pageStore->producePageFile;
-      normalizedPageStore.consumePageFile = pageStore->consumePageFile;
-      normalizedPageStore.producePageFileSize = pageStore->producePageFileSize;
-      normalizedPageStore.consumePageFileSize = pageStore->consumePageFileSize;
-      normalizedPageStore.producePageUVA = pageStore->producePageUVA;
-      normalizedPageStore.consumePageUVA = pageStore->consumePageUVA;
-   } else {
-      normalizedPageStore.producePageFile = pageStore->consumePageFile;
-      normalizedPageStore.consumePageFile = pageStore->producePageFile;
-      normalizedPageStore.producePageFileSize = pageStore->consumePageFileSize;
-      normalizedPageStore.consumePageFileSize = pageStore->producePageFileSize;
-      normalizedPageStore.producePageUVA = pageStore->consumePageUVA;
-      normalizedPageStore.consumePageUVA = pageStore->producePageUVA;
-   }
-
-   if (normalizedPageStore.producePageFileSize > sizeof entry->producePageFile) {
-      result = VMCI_ERROR_NO_MEM;
-       goto out;
-   }
-   if (normalizedPageStore.consumePageFileSize > sizeof entry->consumePageFile) {
-      result = VMCI_ERROR_NO_MEM;
+   result = VMCIHost_GetUserMemory(produceUVA, consumeUVA,
+                                   entry->produceQ, entry->consumeQ);
+   if (result < VMCI_SUCCESS) {
       goto out;
    }
-   if (pageStore->user) {
-      if (VMCI_CopyFromUser(entry->producePageFile,
-                            normalizedPageStore.producePageFile,
-                            (size_t)normalizedPageStore.producePageFileSize)) {
-         result = VMCI_ERROR_GENERIC;
-         goto out;
-      }
 
-      if (VMCI_CopyFromUser(entry->consumePageFile,
-                            normalizedPageStore.consumePageFile,
-                            (size_t)normalizedPageStore.consumePageFileSize)) {
-         result = VMCI_ERROR_GENERIC;
-         goto out;
-      }
-   } else {
-      memcpy(entry->consumePageFile,
-             VMCIVA64ToPtr(normalizedPageStore.consumePageFile),
-             (size_t)normalizedPageStore.consumePageFileSize);
-      memcpy(entry->producePageFile,
-             VMCIVA64ToPtr(normalizedPageStore.producePageFile),
-             (size_t)normalizedPageStore.producePageFileSize);
+   result = VMCIHost_MapQueueHeaders(entry->produceQ, entry->consumeQ);
+   if (result < VMCI_SUCCESS) {
+     VMCIHost_ReleaseUserMemory(entry->produceQ, entry->consumeQ);
+     goto out;
    }
 
-   /*
-    * Copy the data into the attachInfo structure
-    */
-
-   memcpy(&entry->attachInfo->producePageFile[0],
-          &entry->producePageFile[0],
-          (size_t)normalizedPageStore.producePageFileSize);
-   memcpy(&entry->attachInfo->consumePageFile[0],
-          &entry->consumePageFile[0],
-          (size_t)normalizedPageStore.consumePageFileSize);
-
-   /*
-    * NOTE: The UVAs that follow may be 0.  In this case an older VMX has
-    * issued a SetPageFile call without mapping the backing files for the
-    * queue contents.  The result of this is that the queue pair cannot
-    * be connected by host.
-    */
-
-   entry->attachInfo->produceBuffer = normalizedPageStore.producePageUVA;
-   entry->attachInfo->consumeBuffer = normalizedPageStore.consumePageUVA;
-
-   if (VMCIContext_SupportsHostQP(context)) {
-      result = VMCIHost_GetUserMemory(entry->attachInfo,
-                                      entry->produceQ,
-                                      entry->consumeQ);
-
-      if (result < VMCI_SUCCESS) {
-         goto out;
-      }
+   if (entry->state == VMCIQPB_CREATED_NO_MEM) {
+      entry->state = VMCIQPB_CREATED_MEM;
+   } else {
+      ASSERT(entry->state == VMCIQPB_ATTACHED_NO_MEM);
+      entry->state = VMCIQPB_ATTACHED_MEM;
    }
-#endif // VMKERNEL
+   entry->vmciPageFiles = TRUE;
 
-   /*
-    * In the event that the QueuePair was created by a host in a
-    * hosted kernel, then we send notification now that the QueuePair
-    * contents backing files are attached to the Queues.  Note in
-    * VMCIQPBrokerAllocInt(), above, we skipped this step when the
-    * creator was a host (on hosted).
-    */
-
-   if (!vmkernel && entry->createId == VMCI_HOST_CONTEXT_ID) {
+   if (entry->state == VMCIQPB_ATTACHED_MEM) {
       result = QueuePairNotifyPeer(TRUE, handle, contextId, entry->createId);
       if (result < VMCI_SUCCESS) {
-         goto out;
+         VMCI_WARNING((LGPFX"Failed to notify peer (ID=0x%x) of attach to queue "
+                       "pair (handle=0x%x:0x%x).\n", entry->createId,
+                       entry->qp.handle.context, entry->qp.handle.resource));
       }
    }
 
-   entry->pageStoreSet = TRUE;
    result = VMCI_SUCCESS;
 
 out:
@@ -1235,46 +1482,42 @@ out:
  *
  * VMCIQPBroker_Detach --
  *
- *      Informs the VMCI queue pair broker that a context has detached
- *      from a given QueuePair handle.  Assumes that the queue pair
- *      broker lock is held.  If the "detach" input parameter is
- *      FALSE, the queue pair entry is not removed from the list of
- *      queue pairs registered with the queue pair broker, and the
- *      context is not detached from the given handle.  If "detach" is
- *      TRUE, the detach operation really happens.  With "detach" set
- *      to FALSE, the caller can query if the "actual" detach
- *      operation would succeed or not.  The return value from this
- *      function remains the same irrespective of the value of the
- *      boolean "detach".
- *
- *      Also note that the result code for a VM detaching from a
- *      VM-host queue pair is always VMCI_SUCCESS_LAST_DETACH.  This
- *      is so that VMX can unlink the backing files.  On the host side
- *      the files are either locked (Mac OS/Linux) or the contents are
- *      saved (Windows).
+ *      The main entry point for detaching from a queue pair registered with the
+ *      queue pair broker. If more than one endpoint is attached to the queue
+ *      pair, the first endpoint will mainly decrement a reference count and
+ *      generate a notification to its peer. The last endpoint will clean up
+ *      the queue pair state registered with the broker.
+ *
+ *      When a guest endpoint detaches, it will unmap and unregister the guest
+ *      memory backing the queue pair. If the host is still attached, it will
+ *      no longer be able to access the queue pair content.
+ *
+ *      If the queue pair is already in a state where there is no memory
+ *      registered for the queue pair (any *_NO_MEM state), it will transition to
+ *      the VMCIQPB_SHUTDOWN_NO_MEM state. This will also happen, if a guest 
+ *      endpoint is the first of two endpoints to detach. If the host endpoint is
+ *      the first out of two to detach, the queue pair will move to the
+ *      VMCIQPB_SHUTDOWN_MEM state.
  *
  * Results:
- *      Success or failure.
+ *      VMCI_SUCCESS on success, appropriate error code otherwise.
  *
  * Side effects:
- *      None.
+ *      Memory may be freed, and pages may be unpinned.
  *
  *-----------------------------------------------------------------------------
  */
 
 int
 VMCIQPBroker_Detach(VMCIHandle  handle,   // IN
-                    VMCIContext *context, // IN
-                    Bool detach)          // IN: Really detach?
+                    VMCIContext *context) // IN
 {
    QPBrokerEntry *entry;
-   int result;
    const VMCIId contextId = VMCIContext_GetId(context);
    VMCIId peerId;
    Bool isLocal = FALSE;
 
-   if (VMCI_HANDLE_INVALID(handle) ||
-       !context || contextId == VMCI_INVALID_ID) {
+   if (VMCI_HANDLE_INVALID(handle) || !context || contextId == VMCI_INVALID_ID) {
       return VMCI_ERROR_INVALID_ARGS;
    }
 
@@ -1282,358 +1525,276 @@ VMCIQPBroker_Detach(VMCIHandle  handle,   // IN
       VMCI_DEBUG_LOG(4, (LGPFX"Context (ID=0x%x) not attached to queue pair "
                          "(handle=0x%x:0x%x).\n",
                          contextId, handle.context, handle.resource));
-      result = VMCI_ERROR_NOT_FOUND;
-      goto out;
+      return VMCI_ERROR_NOT_FOUND;
    }
 
    entry = (QPBrokerEntry *)QueuePairList_FindEntry(&qpBrokerList, handle);
    if (!entry) {
-      result = VMCI_ERROR_NOT_FOUND;
-      goto out;
+      VMCI_DEBUG_LOG(4, (LGPFX"Context (ID=0x%x) reports being attached to queue pair "
+                         "(handle=0x%x:0x%x) that isn't present in broker.\n",
+                         contextId, handle.context, handle.resource));
+      return VMCI_ERROR_NOT_FOUND;
    }
 
-   isLocal = entry->qp.flags & VMCI_QPFLAG_LOCAL;
-
-   ASSERT(vmkernel || !isLocal);
-
    if (contextId != entry->createId && contextId != entry->attachId) {
-      result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
-      goto out;
+      return VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
    }
 
    if (contextId == entry->createId) {
       peerId = entry->attachId;
-   } else {
-      peerId = entry->createId;
-   }
-
-   if (!detach) {
-      /* Do not update the queue pair entry. */
-
-      ASSERT(entry->qp.refCount == 1 || entry->qp.refCount == 2);
-
-      if (entry->qp.refCount == 1 ||
-          (!vmkernel && peerId == VMCI_HOST_CONTEXT_ID)) {
-         result = VMCI_SUCCESS_LAST_DETACH;
-      } else {
-         result = VMCI_SUCCESS;
-      }
-
-      goto out;
-   }
-
-   if (contextId == entry->createId) {
       entry->createId = VMCI_INVALID_ID;
    } else {
+      peerId = entry->createId;
       entry->attachId = VMCI_INVALID_ID;
    }
    entry->qp.refCount--;
 
-#ifdef _WIN32
-   /*
-    * If the caller detaching is a usermode process (e.g., VMX), then
-    * we must detach the mappings now.  On Windows.
-    *
-    * VMCIHost_SaveProduceQ() will save the guest's produceQ so that
-    * the host can pick up the data after the guest is gone.
-    *
-    * We save the ProduceQ whenever the guest detaches (even if VMX
-    * continues to run).  If we didn't do this, then we'd have the
-    * problem of finding and releasing the memory when the client goes
-    * away because we won't be able to find the client in the list of
-    * QueuePair entries.  The detach code path (has already) set the
-    * contextId for detached end-point to VMCI_INVALID_ID.  (See just
-    * a few lines above where that happens.)  Sure, we could fix that,
-    * and then we could look at all entries finding ones where the
-    * contextId of either creator or attach matches the going away
-    * context's Id.  But, if we just copy out the guest's produceQ
-    * -always- then we reduce the logic changes elsewhere.
-    */
+   isLocal = entry->qp.flags & VMCI_QPFLAG_LOCAL;
 
-   /*
-    * Some example paths through this code:
-    *
-    * Guest-to-guest: the code will call ReleaseUserMemory() once when
-    * the first guest detaches.  And then a second time when the
-    * second guest detaches.  That's OK.  Nobody is using the user
-    * memory (because there is no host attached) and
-    * ReleaseUserMemory() tracks its resources.
-    *
-    * Host detaches first: the code will not call anything because
-    * contextId == VMCI_HOST_CONTEXT_ID and because (in the second if
-    * () clause below) refCount > 0.
-    *
-    * Guest detaches second: the first if clause, below, will not be
-    * taken because refCount is already 0.  The second if () clause
-    * (below) will be taken and it will simply call
-    * ReleaseUserMemory().
-    *
-    * Guest detaches first: the code will call SaveProduceQ().
-    *
-    * Host detaches second: the code will call ReleaseUserMemory()
-    * which will free the kernel allocated Q memory.
-    */
+   if (contextId != VMCI_HOST_CONTEXT_ID) {
+      int result;
+
+      ASSERT(entry->state != VMCIQPB_SHUTDOWN_NO_MEM);
+      ASSERT(!isLocal);
 
-   if (entry->pageStoreSet &&
-       contextId != VMCI_HOST_CONTEXT_ID &&
-       VMCIContext_SupportsHostQP(context) &&
-       entry->qp.refCount) {
       /*
-       * It's important to pass down produceQ and consumeQ in the
-       * correct order because the produceQ that is to be saved is the
-       * guest's, so we have to be sure that the routine sees the
-       * guest's produceQ as (in this case) the first Q parameter.
+       * Pre NOVMVM vmx'en may detach from a queue pair before setting the page
+       * store, and in that case there is no user memory to detach from. Also,
+       * more recent VMX'en may detach from a queue pair in the quiesced state.
        */
 
-      if (entry->attachId == VMCI_HOST_CONTEXT_ID) {
-         VMCIHost_SaveProduceQ(entry->attachInfo,
-                               entry->produceQ,
-                               entry->consumeQ,
-                               entry->qp.produceSize);
-      } else if (entry->createId == VMCI_HOST_CONTEXT_ID) {
-         VMCIHost_SaveProduceQ(entry->attachInfo,
-                               entry->consumeQ,
-                               entry->produceQ,
-                               entry->qp.consumeSize);
-      } else {
-         VMCIHost_ReleaseUserMemory(entry->attachInfo,
-                                    entry->produceQ,
-                                    entry->consumeQ);
+      if (entry->state != VMCIQPB_ATTACHED_NO_MEM &&
+          entry->state != VMCIQPB_CREATED_NO_MEM) {
+
+         VMCI_AcquireQueueMutex(entry->produceQ);
+         result = VMCIHost_UnmapQueueHeaders(INVALID_VMCI_GUEST_MEM_ID,
+                                             entry->produceQ,
+                                             entry->consumeQ);
+         if (result < VMCI_SUCCESS) {
+            VMCI_WARNING((LGPFX"Failed to unmap queue headers for queue pair "
+                          "(handle=0x%x:0x%x,result=%d).\n", handle.context,
+                          handle.resource, result));
+         }
+         if (entry->vmciPageFiles) {
+            VMCIHost_ReleaseUserMemory(entry->produceQ, entry->consumeQ);
+         } else {
+            VMCIHost_UnregisterUserMemory(entry->produceQ, entry->consumeQ);
+         }
+         VMCI_ReleaseQueueMutex(entry->produceQ);
       }
    }
-#endif // _WIN32
 
-   if (!entry->qp.refCount) {
+   if (entry->qp.refCount == 0) {
       QueuePairList_RemoveEntry(&qpBrokerList, &entry->qp);
 
-#ifndef VMKERNEL
-      if (entry->pageStoreSet &&
-          VMCIContext_SupportsHostQP(context)) {
-         VMCIHost_ReleaseUserMemory(entry->attachInfo,
-                                    entry->produceQ,
-                                    entry->consumeQ);
-      }
-      if (entry->attachInfo) {
-         VMCI_FreeKernelMem(entry->attachInfo, sizeof *entry->attachInfo);
-      }
-      if (entry->produceQ) {
-         VMCI_FreeKernelMem(entry->produceQ, sizeof *entry->produceQ);
-      }
-      if (entry->consumeQ) {
-         VMCI_FreeKernelMem(entry->consumeQ, sizeof *entry->consumeQ);
+      if (isLocal) {
+         VMCI_FreeKernelMem(entry->localMem, QPE_NUM_PAGES(entry->qp) * PAGE_SIZE);
       }
-#endif // !VMKERNEL
-
+      VMCI_CleanupQueueMutex(entry->produceQ, entry->consumeQ);
+      VMCIHost_FreeQueue(entry->produceQ, entry->qp.produceSize);
+      VMCIHost_FreeQueue(entry->consumeQ, entry->qp.consumeSize);
       VMCI_FreeKernelMem(entry, sizeof *entry);
-      result = VMCI_SUCCESS_LAST_DETACH;
-   } else {
-      /*
-       * XXX: If we ever allow the creator to detach and attach again
-       * to the same queue pair, we need to handle the mapping of the
-       * shared memory region in vmkernel differently. Currently, we
-       * assume that an attaching VM always needs to swap the two
-       * queues.
-       */
 
+      VMCIHandleArray_RemoveEntry(context->queuePairArray, handle);
+   } else {
       ASSERT(peerId != VMCI_INVALID_ID);
       QueuePairNotifyPeer(FALSE, handle, contextId, peerId);
-      if (!vmkernel && peerId == VMCI_HOST_CONTEXT_ID) {
-         result = VMCI_SUCCESS_LAST_DETACH;
+      if (contextId == VMCI_HOST_CONTEXT_ID) {
+         entry->state = VMCIQPB_SHUTDOWN_MEM;
       } else {
-         result = VMCI_SUCCESS;
+         entry->state = VMCIQPB_SHUTDOWN_NO_MEM;
       }
-   }
-
-out:
-   if (result >= VMCI_SUCCESS && detach) {
-      if (!isLocal || result == VMCI_SUCCESS_LAST_DETACH) {
+      if (!isLocal) {
          VMCIHandleArray_RemoveEntry(context->queuePairArray, handle);
       }
    }
-   return result;
+
+   return VMCI_SUCCESS;
 }
 
 
 /*
  *-----------------------------------------------------------------------------
  *
- * QueuePairNotifyPeer --
+ * VMCIQPBroker_Map --
  *
- *      Enqueues an event datagram to notify the peer VM attached to
- *      the given queue pair handle about attach/detach event by the
- *      given VM.
+ *      Establishes the necessary mappings for a queue pair given a
+ *      reference to the queue pair guest memory. This is usually
+ *      called when a guest is unquiesced and the VMX is allowed to
+ *      map guest memory once again.
  *
  * Results:
- *      Payload size of datagram enqueued on success, error code otherwise.
+ *      VMCI_SUCCESS on success, appropriate error code otherwise.
  *
  * Side effects:
- *      Memory is allocated.
+ *      Memory may be allocated, and pages may be pinned.
  *
  *-----------------------------------------------------------------------------
  */
 
 int
-QueuePairNotifyPeer(Bool attach,       // IN: attach or detach?
-                    VMCIHandle handle, // IN
-                    VMCIId myId,       // IN
-                    VMCIId peerId)     // IN: CID of VM to notify
+VMCIQPBroker_Map(VMCIHandle  handle,      // IN
+                 VMCIContext *context,    // IN
+                 VMCIQPGuestMem guestMem) // IN
 {
-   int rv;
-   VMCIEventMsg *eMsg;
-   VMCIEventPayload_QP *evPayload;
-   char buf[sizeof *eMsg + sizeof *evPayload];
+   QPBrokerEntry *entry;
+   const VMCIId contextId = VMCIContext_GetId(context);
+   Bool isLocal = FALSE;
+   int result;
 
-   if (VMCI_HANDLE_INVALID(handle) || myId == VMCI_INVALID_ID ||
-       peerId == VMCI_INVALID_ID) {
+   if (vmkernel) {
+      ASSERT(FALSE);
+      return VMCI_ERROR_UNAVAILABLE;
+   }
+
+   if (VMCI_HANDLE_INVALID(handle) || !context || contextId == VMCI_INVALID_ID) {
       return VMCI_ERROR_INVALID_ARGS;
    }
 
-   /*
-    * Notification message contains: queue pair handle and
-    * attaching/detaching VM's context id.
-    */
+   if (!VMCIHandleArray_HasEntry(context->queuePairArray, handle)) {
+      VMCI_DEBUG_LOG(4, (LGPFX"Context (ID=0x%x) not attached to queue pair "
+                         "(handle=0x%x:0x%x).\n",
+                         contextId, handle.context, handle.resource));
+      return VMCI_ERROR_NOT_FOUND;
+   }
 
-   eMsg = (VMCIEventMsg *)buf;
+   entry = (QPBrokerEntry *)QueuePairList_FindEntry(&qpBrokerList, handle);
+   if (!entry) {
+      VMCI_DEBUG_LOG(4, (LGPFX"Context (ID=0x%x) reports being attached to queue pair "
+                         "(handle=0x%x:0x%x) that isn't present in broker.\n",
+                         contextId, handle.context, handle.resource));
+      return VMCI_ERROR_NOT_FOUND;
+   }
 
-   /*
-    * In VMCIContext_EnqueueDatagram() we enforce the upper limit on number of
-    * pending events from the hypervisor to a given VM otherwise a rogue VM
-    * could do arbitrary number of attached and detaches causing memory
-    * pressure in the host kernel.
-   */
+   if (contextId != entry->createId && contextId != entry->attachId) {
+      return VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
+   }
 
-   /* Clear out any garbage. */
-   memset(eMsg, 0, sizeof *eMsg + sizeof *evPayload);
+   isLocal = entry->qp.flags & VMCI_QPFLAG_LOCAL;
 
-   eMsg->hdr.dst = VMCI_MAKE_HANDLE(peerId, VMCI_EVENT_HANDLER);
-   eMsg->hdr.src = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
-                                    VMCI_CONTEXT_RESOURCE_ID);
-   eMsg->hdr.payloadSize = sizeof *eMsg + sizeof *evPayload - sizeof eMsg->hdr;
-   eMsg->eventData.event = attach ? VMCI_EVENT_QP_PEER_ATTACH :
-                                    VMCI_EVENT_QP_PEER_DETACH;
-   evPayload = VMCIEventMsgPayload(eMsg);
-   evPayload->handle = handle;
-   evPayload->peerId = myId;
+   if (contextId != VMCI_HOST_CONTEXT_ID) {
+      QueuePairPageStore pageStore;
 
-   rv = VMCIDatagram_Dispatch(VMCI_HYPERVISOR_CONTEXT_ID, (VMCIDatagram *)eMsg,
-                              FALSE);
-   if (rv < VMCI_SUCCESS) {
-      VMCI_WARNING((LGPFX"Failed to enqueue QueuePair %s event datagram for "
-                    "context (ID=0x%x).\n", attach ? "ATTACH" : "DETACH",
-                    peerId));
+      ASSERT(entry->state == VMCIQPB_CREATED_NO_MEM ||
+             entry->state == VMCIQPB_SHUTDOWN_NO_MEM ||
+             entry->state == VMCIQPB_ATTACHED_NO_MEM);
+      ASSERT(!isLocal);
+
+      pageStore.pages = guestMem;
+      pageStore.len = QPE_NUM_PAGES(entry->qp);
+
+      VMCI_AcquireQueueMutex(entry->produceQ);
+
+      result = VMCIHost_RegisterUserMemory(&pageStore, entry->produceQ, entry->consumeQ);
+      if (result == VMCI_SUCCESS) {
+         /*
+          * Move state from *_NO_MEM to *_MEM.
+          */
+
+         entry->state++;
+
+         ASSERT(entry->state == VMCIQPB_CREATED_MEM ||
+                entry->state == VMCIQPB_SHUTDOWN_MEM ||
+                entry->state == VMCIQPB_ATTACHED_MEM);
+      }
+      VMCI_ReleaseQueueMutex(entry->produceQ);
+   } else {
+      result = VMCI_SUCCESS;
    }
 
-   return rv;
+   return result;
 }
 
-#if !defined(VMKERNEL)
-
 /*
- *----------------------------------------------------------------------
+ *-----------------------------------------------------------------------------
  *
- * VMCIQueuePairAllocHostWork --
+ * VMCIQPBroker_Unmap --
  *
- *    This function implements the kernel API for allocating a queue
- *    pair.
+ *      Removes all references to the guest memory of a given queue pair, and
+ *      will move the queue pair from state *_MEM to *_NO_MEM. It is usually
+ *      called when a VM is being quiesced where access to guest memory should
+ *      avoided.
  *
  * Results:
- *     VMCI_SUCCESS on succes and appropriate failure code otherwise.
+ *      VMCI_SUCCESS on success, appropriate error code otherwise.
  *
  * Side effects:
- *     May allocate memory.
+ *      Memory may be freed, and pages may be unpinned.
  *
- *----------------------------------------------------------------------
+ *-----------------------------------------------------------------------------
  */
 
-static int
-VMCIQueuePairAllocHostWork(VMCIHandle *handle,           // IN/OUT
-                           VMCIQueue **produceQ,         // OUT
-                           uint64 produceSize,           // IN
-                           VMCIQueue **consumeQ,         // OUT
-                           uint64 consumeSize,           // IN
-                           VMCIId peer,                  // IN
-                           uint32 flags,                 // IN
-                           VMCIPrivilegeFlags privFlags) // IN
+int
+VMCIQPBroker_Unmap(VMCIHandle  handle,   // IN
+                   VMCIContext *context, // IN
+                   VMCIGuestMemID gid)   // IN
 {
-   VMCIContext *context;
-   int result;
    QPBrokerEntry *entry;
+   const VMCIId contextId = VMCIContext_GetId(context);
+   Bool isLocal = FALSE;
 
-   result = VMCI_SUCCESS;
+   if (VMCI_HANDLE_INVALID(handle) || !context || contextId == VMCI_INVALID_ID) {
+      return VMCI_ERROR_INVALID_ARGS;
+   }
 
-   if (VMCI_HANDLE_INVALID(*handle)) {
-      VMCIId resourceID = VMCIResource_GetID(VMCI_HOST_CONTEXT_ID);
-      if (resourceID == VMCI_INVALID_ID) {
-         return VMCI_ERROR_NO_HANDLE;
-      }
-      *handle = VMCI_MAKE_HANDLE(VMCI_HOST_CONTEXT_ID, resourceID);
+   if (!VMCIHandleArray_HasEntry(context->queuePairArray, handle)) {
+      VMCI_DEBUG_LOG(4, (LGPFX"Context (ID=0x%x) not attached to queue pair "
+                         "(handle=0x%x:0x%x).\n",
+                         contextId, handle.context, handle.resource));
+      return VMCI_ERROR_NOT_FOUND;
    }
 
-   context = VMCIContext_Get(VMCI_HOST_CONTEXT_ID);
-   ASSERT(context);
+   entry = (QPBrokerEntry *)QueuePairList_FindEntry(&qpBrokerList, handle);
+   if (!entry) {
+      VMCI_DEBUG_LOG(4, (LGPFX"Context (ID=0x%x) reports being attached to queue pair "
+                         "(handle=0x%x:0x%x) that isn't present in broker.\n",
+                         contextId, handle.context, handle.resource));
+      return VMCI_ERROR_NOT_FOUND;
+   }
 
-   entry = NULL;
-   VMCIQPBroker_Lock();
-   result = VMCIQPBrokerAllocInt(*handle, peer, flags, privFlags, produceSize,
-                                 consumeSize, NULL, context, &entry);
+   if (contextId != entry->createId && contextId != entry->attachId) {
+      return VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
+   }
 
-   if (result >= VMCI_SUCCESS) {
-      ASSERT(entry != NULL);
+   isLocal = entry->qp.flags & VMCI_QPFLAG_LOCAL;
 
-      if (entry->createId == VMCI_HOST_CONTEXT_ID) {
-         *produceQ = entry->produceQ;
-         *consumeQ = entry->consumeQ;
-      } else {
-         *produceQ = entry->consumeQ;
-         *consumeQ = entry->produceQ;
-      }
+   if (contextId != VMCI_HOST_CONTEXT_ID) {
+      ASSERT(entry->state != VMCIQPB_CREATED_NO_MEM &&
+             entry->state != VMCIQPB_SHUTDOWN_NO_MEM &&
+             entry->state != VMCIQPB_ATTACHED_NO_MEM);
+      ASSERT(!isLocal);
 
-      result = VMCI_SUCCESS;
-   } else {
-      *handle = VMCI_INVALID_HANDLE;
-      VMCI_DEBUG_LOG(4, (LGPFX"queue pair broker failed to alloc (result=%d).\n",
-                         result));
-   }
+      VMCI_AcquireQueueMutex(entry->produceQ);
 
-   VMCIQPBroker_Unlock();
-   VMCIContext_Release(context);
-   return result;
-}
+      VMCIHost_UnmapQueueHeaders(gid,
+                                 entry->produceQ,
+                                 entry->consumeQ);
 
+      if (!vmkernel) {
+         /*
+          * On hosted, when we unmap queue pairs, the VMX will also
+          * unmap the guest memory, so we invalidate the previously
+          * registered memory. If the queue pair is mapped again at a
+          * later point in time, we will need to reregister the user
+          * memory with a possibly new user VA.
+          */
 
-/*
- *----------------------------------------------------------------------
- *
- * VMCIQueuePairDetachHostWork --
- *
- *    This function implements the host kernel API for detaching from
- *    a queue pair.
- *
- * Results:
- *     VMCI_SUCCESS on success and appropriate failure code otherwise.
- *
- * Side effects:
- *     May deallocate memory.
- *
- *----------------------------------------------------------------------
- */
+         VMCIHost_UnregisterUserMemory(entry->produceQ, entry->consumeQ);
 
-static int
-VMCIQueuePairDetachHostWork(VMCIHandle handle) // IN
-{
-   int result;
-   VMCIContext *context;
+         /*
+          * Move state from *_MEM to *_NO_MEM.
+          */
 
-   context = VMCIContext_Get(VMCI_HOST_CONTEXT_ID);
+         entry->state--;
+      }
 
-   VMCIQPBroker_Lock();
-   result = VMCIQPBroker_Detach(handle, context, TRUE);
-   VMCIQPBroker_Unlock();
+      VMCI_ReleaseQueueMutex(entry->produceQ);
+   }
 
-   VMCIContext_Release(context);
-   return result;
+   return VMCI_SUCCESS;
 }
 
+#if !defined(VMKERNEL)
 
 /*
  *-----------------------------------------------------------------------------
@@ -1871,6 +2032,7 @@ QPGuestEndpointDestroy(QPGuestEndpoint *entry) // IN
    ASSERT(entry->qp.refCount == 0);
 
    VMCI_FreePPNSet(&entry->ppnSet);
+   VMCI_CleanupQueueMutex(entry->produceQ, entry->consumeQ);
    VMCI_FreeQueue(entry->produceQ, entry->qp.produceSize);
    VMCI_FreeQueue(entry->consumeQ, entry->qp.consumeSize);
    VMCI_FreeKernelMem(entry, sizeof *entry);
@@ -1914,7 +2076,7 @@ VMCIQueuePairAllocHypercall(const QPGuestEndpoint *entry) // IN
    }
 
    allocMsg->hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
-                                       VMCI_QUEUEPAIR_ALLOC);
+                                        VMCI_QUEUEPAIR_ALLOC);
    allocMsg->hdr.src = VMCI_ANON_SRC_HANDLE;
    allocMsg->hdr.payloadSize = msgSize - VMCI_DG_HEADERSIZE;
    allocMsg->handle = entry->qp.handle;
index b2531dcda970254fe686f5d6035600731dc1f1bd..8b473572a278d4a1c238ca943b09a3bef00c3ca9 100644 (file)
 #include "includeCheck.h"
 
 #include "vmci_defs.h"
-#include "vmciContext.h"
 #include "vmci_iocontrols.h"
+#include "vmci_kernel_if.h"
+#include "vmciContext.h"
 #include "vmciQueue.h"
-#ifdef VMKERNEL
-#include "vm_atomic.h"
-#include "util_copy_dist.h"
-#include "shm.h"
 
 /*
- * On vmkernel, the queue pairs are either backed by shared memory or
- * kernel memory allocated on the VMCI heap. Shared memory is used for
- * guest to guest and guest to host queue pairs, whereas the heap
- * allocated queue pairs are used for host local queue pairs.
+ * QueuePairPageStore describes how the memory of a given queue pair
+ * is backed. When the queue pair is between the host and a guest, the
+ * page store consists of references to the guest pages. On vmkernel,
+ * this is a list of PPNs, and on hosted, it is a user VA where the
+ * queue pair is mapped into the VMX address space.
  */
 
 typedef struct QueuePairPageStore {
-   Bool shared; // Indicates whether the pages are stored in shared memory
-   union {
-      Shm_ID   shmID;
-      void    *ptr;
-   } store;
+   VMCIQPGuestMem pages;  // Reference to pages backing the queue pair.
+   uint32 len;            // Length of pageList/virtual addres range (in pages).
 } QueuePairPageStore;
 
-#else
-
-typedef struct QueuePairPageStore {
-   Bool user;                  // Whether the page file strings are userspace pointers
-   VA64 producePageFile;       // Name of the file
-   VA64 consumePageFile;       // Name of the file
-   uint64 producePageFileSize; // Size of the string
-   uint64 consumePageFileSize; // Size of the string
-   VA64 producePageUVA;        // User space VA of the mapped file in VMX
-   VA64 consumePageUVA;        // User space VA of the mapped file in VMX
-} QueuePairPageStore;
-
-#endif // !VMKERNEL
-
-#if (defined(__linux__) || defined(_WIN32) || defined(__APPLE__) || \
-     defined(SOLARIS)) && !defined(VMKERNEL)
-struct VMCIQueue;
-
-typedef struct PageStoreAttachInfo {
-   char producePageFile[VMCI_PATH_MAX];
-   char consumePageFile[VMCI_PATH_MAX];
-   uint64 numProducePages;
-   uint64 numConsumePages;
-
-   /* User VAs in the VMX task */
-   VA64   produceBuffer;
-   VA64   consumeBuffer;
-
-   /*
-    * Platform-specific references to the physical pages backing the
-    * queue. These include a page for the header.
-    *
-    * PR361589 tracks this, too.
-    */
-
-#if defined(__linux__)
-   struct page **producePages;
-   struct page **consumePages;
-#elif defined(_WIN32)
-   void *kmallocPtr;
-   size_t kmallocSize;
-   PMDL produceMDL;
-   PMDL consumeMDL;
-#elif defined(__APPLE__)
-   /*
-    * All the Mac OS X fields are members of the VMCIQueue
-    */
-#endif
-} PageStoreAttachInfo;
-
-#endif // (__linux__ || _WIN32 || __APPLE__) && !VMKERNEL
-
 
 /*
  *------------------------------------------------------------------------------
@@ -128,13 +71,7 @@ typedef struct PageStoreAttachInfo {
 static INLINE Bool
 VMCI_QP_PAGESTORE_IS_WELLFORMED(QueuePairPageStore *pageStore) // IN
 {
-#ifdef VMKERNEL
-   return (pageStore->shared && pageStore->store.shmID != SHM_INVALID_ID) ||
-          (!pageStore->shared && pageStore->store.ptr != NULL);
-#else
-   return pageStore->producePageFile && pageStore->consumePageFile &&
-          pageStore->producePageFileSize && pageStore->consumePageFileSize;
-#endif // !VMKERNEL
+  return pageStore->len >= 2;
 }
 
 int VMCIQPBroker_Init(void);
@@ -146,10 +83,9 @@ int VMCIQPBroker_Alloc(VMCIHandle handle, VMCIId peer, uint32 flags,
                        uint64 produceSize, uint64 consumeSize,
                        QueuePairPageStore *pageStore,
                        VMCIContext *context);
-int VMCIQPBroker_SetPageStore(VMCIHandle handle,
-                              QueuePairPageStore *pageStore,
+int VMCIQPBroker_SetPageStore(VMCIHandle handle, VA64 produceUVA, VA64 consumeUVA,
                               VMCIContext *context);
-int VMCIQPBroker_Detach(VMCIHandle handle, VMCIContext *context, Bool detach);
+int VMCIQPBroker_Detach(VMCIHandle handle, VMCIContext *context);
 
 int VMCIQPGuestEndpoints_Init(void);
 void VMCIQPGuestEndpoints_Exit(void);
@@ -161,6 +97,8 @@ int VMCIQueuePair_Alloc(VMCIHandle *handle, VMCIQueue **produceQ,
                         uint64 consumeSize, VMCIId peer, uint32 flags,
                         VMCIPrivilegeFlags privFlags, Bool guestEndpoint);
 int VMCIQueuePair_Detach(VMCIHandle handle, Bool guestEndpoint);
+int VMCIQPBroker_Map(VMCIHandle  handle, VMCIContext *context, VMCIQPGuestMem guestMem);
+int VMCIQPBroker_Unmap(VMCIHandle  handle, VMCIContext *context, VMCIGuestMemID gid);
 
 
 #endif /* !_VMCI_QUEUE_PAIR_H_ */
index 4d8d7c29e50ffc9a78216e53710f72b68cc2f251..9be8233339504d01c306d160e80e8c1b54853341 100644 (file)
@@ -410,7 +410,7 @@ vmci_host_init(void)
    if (retval) {
       Warning(LGPFX"Module registration error "
               "(name=%s,major=%d,minor=%d,err=%d).\n",
-             linuxState.deviceName, -retval, linuxState.major,
+              linuxState.deviceName, -retval, linuxState.major,
               linuxState.minor);
       goto exit;
    }
@@ -524,7 +524,7 @@ LinuxDriver_Close(struct inode *inode, // IN
 
 static unsigned int
 LinuxDriverPoll(struct file *filp,
-               poll_table *wait)
+                poll_table *wait)
 {
    VMCILockFlags flags;
    VMCILinux *vmciLinux = (VMCILinux *) filp->private_data;
@@ -594,6 +594,62 @@ VMCICopyHandleArrayToUser(void *userBufUVA,             // IN
 }
 
 
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * VMCIDoQPBrokerAlloc --
+ *
+ *      Helper function for creating queue pair and copying the result
+ *      to user memory.
+ *
+ * Results:
+ *      0 if result value was copied to user memory, -EFAULT otherwise.
+ *
+ * Side effects:
+ *      None.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+static int
+VMCIDoQPBrokerAlloc(VMCIHandle handle,
+                    VMCIId peer,
+                    uint32 flags,
+                    uint64 produceSize,
+                    uint64 consumeSize,
+                    QueuePairPageStore *pageStore,
+                    VMCIContext *context,
+                    Bool vmToVm,
+                    void *resultUVA)
+{
+   VMCIId cid;
+   int result;
+   int retval;
+
+   cid = VMCIContext_GetId(context);
+
+   VMCIQPBroker_Lock();
+   result = VMCIQPBroker_Alloc(handle, peer, flags, VMCI_NO_PRIVILEGE_FLAGS,
+                               produceSize, consumeSize, pageStore, context);
+   if (result == VMCI_SUCCESS && vmToVm) {
+      result = VMCI_SUCCESS_QUEUEPAIR_CREATE;
+   }
+   Log(LGPFX"IOCTL_VMCI_QUEUEPAIR_ALLOC (cid=%u,result=%d).\n", cid, result);
+   retval = copy_to_user(resultUVA, &result, sizeof result);
+   if (retval) {
+      retval = -EFAULT;
+      if (result >= VMCI_SUCCESS) {
+         result = VMCIQPBroker_Detach(handle, context);
+         ASSERT(result >= VMCI_SUCCESS);
+      }
+   }
+
+   VMCIQPBroker_Unlock();
+
+   return retval;
+}
+
+
 /*
  *-----------------------------------------------------------------------------
  *
@@ -694,11 +750,11 @@ LinuxDriver_Ioctl(struct inode *inode,
       initBlock.cid = VMCIContext_GetId(vmciLinux->context);
       retval = copy_to_user((void *)ioarg, &initBlock, sizeof initBlock);
       if (retval != 0) {
-        VMCIContext_ReleaseContext(vmciLinux->context);
-        vmciLinux->context = NULL;
-        Log(LGPFX"Error writing init block.\n");
-        retval = -EFAULT;
-        goto init_release;
+         VMCIContext_ReleaseContext(vmciLinux->context);
+         vmciLinux->context = NULL;
+         Log(LGPFX"Error writing init block.\n");
+         retval = -EFAULT;
+         goto init_release;
       }
       ASSERT(initBlock.cid != VMCI_INVALID_ID);
 
@@ -731,14 +787,14 @@ LinuxDriver_Ioctl(struct inode *inode,
 
       if (sendInfo.len > VMCI_MAX_DG_SIZE) {
          Warning(LGPFX"Datagram too big (size=%d).\n", sendInfo.len);
-        retval = -EINVAL;
-        break;
+         retval = -EINVAL;
+         break;
       }
 
       if (sendInfo.len < sizeof *dg) {
          Warning(LGPFX"Datagram too small (size=%d).\n", sendInfo.len);
-        retval = -EINVAL;
-        break;
+         retval = -EINVAL;
+         break;
       }
 
       dg = VMCI_AllocKernelMem(sendInfo.len, VMCI_MEMORY_NORMAL);
@@ -799,70 +855,121 @@ LinuxDriver_Ioctl(struct inode *inode,
                                                     &size, &dg);
 
       if (recvInfo.result >= VMCI_SUCCESS) {
-        ASSERT(dg);
-        retval = copy_to_user((void *) ((uintptr_t) recvInfo.addr), dg,
-                              VMCI_DG_SIZE(dg));
-        VMCI_FreeKernelMem(dg, VMCI_DG_SIZE(dg));
-        if (retval != 0) {
-           break;
-        }
+         ASSERT(dg);
+         retval = copy_to_user((void *) ((uintptr_t) recvInfo.addr), dg,
+                               VMCI_DG_SIZE(dg));
+         VMCI_FreeKernelMem(dg, VMCI_DG_SIZE(dg));
+         if (retval != 0) {
+            break;
+         }
       }
       retval = copy_to_user((void *)ioarg, &recvInfo, sizeof recvInfo);
       break;
    }
 
    case IOCTL_VMCI_QUEUEPAIR_ALLOC: {
-      VMCIQueuePairAllocInfo queuePairAllocInfo;
-      VMCIQueuePairAllocInfo *info = (VMCIQueuePairAllocInfo *)ioarg;
+      if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
+         Log(LGPFX"IOCTL_VMCI_QUEUEPAIR_ALLOC only valid for contexts.\n");
+         retval = -EINVAL;
+         break;
+      }
+
+      if (vmciLinux->userVersion < VMCI_VERSION_NOVMVM) {
+         VMCIQueuePairAllocInfo_VMToVM queuePairAllocInfo;
+         VMCIQueuePairAllocInfo_VMToVM *info = (VMCIQueuePairAllocInfo_VMToVM *)ioarg;
+
+         retval = copy_from_user(&queuePairAllocInfo, (void *)ioarg,
+                                 sizeof queuePairAllocInfo);
+         if (retval) {
+            retval = -EFAULT;
+            break;
+         }
+
+         retval = VMCIDoQPBrokerAlloc(queuePairAllocInfo.handle,
+                                      queuePairAllocInfo.peer,
+                                      queuePairAllocInfo.flags,
+                                      queuePairAllocInfo.produceSize,
+                                      queuePairAllocInfo.consumeSize,
+                                      NULL,
+                                      vmciLinux->context,
+                                      TRUE, // VM to VM style create
+                                      &info->result);
+      } else {
+         VMCIQueuePairAllocInfo queuePairAllocInfo;
+         VMCIQueuePairAllocInfo *info = (VMCIQueuePairAllocInfo *)ioarg;
+         QueuePairPageStore pageStore;
+
+         retval = copy_from_user(&queuePairAllocInfo, (void *)ioarg,
+                                 sizeof queuePairAllocInfo);
+         if (retval) {
+            retval = -EFAULT;
+            break;
+         }
+
+         pageStore.pages = queuePairAllocInfo.ppnVA;
+         pageStore.len = queuePairAllocInfo.numPPNs;
+
+         retval = VMCIDoQPBrokerAlloc(queuePairAllocInfo.handle,
+                                      queuePairAllocInfo.peer,
+                                      queuePairAllocInfo.flags,
+                                      queuePairAllocInfo.produceSize,
+                                      queuePairAllocInfo.consumeSize,
+                                      &pageStore,
+                                      vmciLinux->context,
+                                      FALSE, // Not VM to VM style create
+                                      &info->result);
+      }
+      break;
+   }
+
+   case IOCTL_VMCI_QUEUEPAIR_SETVA: {
+      VMCIQueuePairSetVAInfo setVAInfo;
+      VMCIQueuePairSetVAInfo *info = (VMCIQueuePairSetVAInfo *)ioarg;
       int32 result;
-      VMCIId cid;
 
       if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
-         Log(LGPFX"IOCTL_VMCI_QUEUEPAIR_ALLOC only valid for contexts.\n");
+         Log(LGPFX"IOCTL_VMCI_QUEUEPAIR_SETVA only valid for contexts.\n");
          retval = -EINVAL;
          break;
       }
 
-      retval = copy_from_user(&queuePairAllocInfo, (void *)ioarg,
-                              sizeof queuePairAllocInfo);
+      if (vmciLinux->userVersion < VMCI_VERSION_NOVMVM) {
+         Log(LGPFX"IOCTL_VMCI_QUEUEPAIR_SETVA not supported for this VMX version.\n");
+         retval = -EINVAL;
+         break;
+      }
+
+      retval = copy_from_user(&setVAInfo, (void *)ioarg, sizeof setVAInfo);
       if (retval) {
          retval = -EFAULT;
          break;
       }
 
-      cid = VMCIContext_GetId(vmciLinux->context);
-      VMCIQPBroker_Lock();
+      if (setVAInfo.va) {
+         /*
+          * VMX is passing down a new VA for the queue pair mapping.
+          */
+
+         VMCIQPBroker_Lock();
+         result = VMCIQPBroker_Map(setVAInfo.handle, vmciLinux->context, setVAInfo.va);
+         VMCIQPBroker_Unlock();
+         Log(LGPFX"IOCTL_VMCI_QUEUEPAIR_SETVA - map (result=%d).\n", result);
+      } else {
+         /*
+          * The queue pair is about to be unmapped by the VMX.
+          */
+
+         VMCIQPBroker_Lock();
+         result = VMCIQPBroker_Unmap(setVAInfo.handle, vmciLinux->context, 0);
+         VMCIQPBroker_Unlock();
+         Log(LGPFX"IOCTL_VMCI_QUEUEPAIR_SETVA - unmap (result=%d).\n", result);
+      }
 
-      {
-        QueuePairPageStore pageStore = { TRUE,
-                                         queuePairAllocInfo.producePageFile,
-                                         queuePairAllocInfo.consumePageFile,
-                                         queuePairAllocInfo.producePageFileSize,
-                                         queuePairAllocInfo.consumePageFileSize,
-                                         0,
-                                         0 };
-
-        result = VMCIQPBroker_Alloc(queuePairAllocInfo.handle,
-                                     queuePairAllocInfo.peer,
-                                     queuePairAllocInfo.flags,
-                                     VMCI_NO_PRIVILEGE_FLAGS,
-                                     queuePairAllocInfo.produceSize,
-                                     queuePairAllocInfo.consumeSize,
-                                     &pageStore,
-                                     vmciLinux->context);
-      }
-      Log(LGPFX"IOCTL_VMCI_QUEUEPAIR_ALLOC (cid=%u,result=%d).\n", cid, result);
       retval = copy_to_user(&info->result, &result, sizeof result);
       if (retval) {
          retval = -EFAULT;
-         if (result >= VMCI_SUCCESS) {
-            result = VMCIQPBroker_Detach(queuePairAllocInfo.handle,
-                                         vmciLinux->context, TRUE);
-            ASSERT(result >= VMCI_SUCCESS);
-         }
       }
 
-      VMCIQPBroker_Unlock();
       break;
    }
 
@@ -871,29 +978,22 @@ LinuxDriver_Ioctl(struct inode *inode,
       VMCIQueuePairPageFileInfo *info = (VMCIQueuePairPageFileInfo *)ioarg;
       int32 result;
       VMCIId cid;
-      Bool useUVA;
-      int size;
 
-      if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
-         Log(LGPFX"IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE only valid for contexts.\n");
+      if (vmciLinux->userVersion < VMCI_VERSION_HOSTQP ||
+          vmciLinux->userVersion >= VMCI_VERSION_NOVMVM) {
+         Log(LGPFX"IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE not supported this VMX "
+             "(version=%d).\n", vmciLinux->userVersion);
          retval = -EINVAL;
          break;
       }
 
-      if (VMCIContext_SupportsHostQP(vmciLinux->context)) {
-         useUVA = TRUE;
-         size = sizeof *info;
-      } else {
-         /*
-          * An older VMX version won't supply the UVA of the page
-          * files backing the queue pair contents (and headers)
-          */
-
-         useUVA = FALSE;
-         size = sizeof(VMCIQueuePairPageFileInfo_NoHostQP);
+      if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
+         Log(LGPFX"IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE only valid for contexts.\n");
+         retval = -EINVAL;
+         break;
       }
 
-      retval = copy_from_user(&pageFileInfo, (void *)ioarg, size);
+      retval = copy_from_user(&pageFileInfo, (void *)ioarg, sizeof *info);
       if (retval) {
          retval = -EFAULT;
          break;
@@ -919,27 +1019,19 @@ LinuxDriver_Ioctl(struct inode *inode,
       result = VMCI_SUCCESS;
       retval = copy_to_user(&info->result, &result, sizeof result);
       if (retval == 0) {
-         cid = VMCIContext_GetId(vmciLinux->context);
          VMCIQPBroker_Lock();
 
-         {
-            QueuePairPageStore pageStore = { TRUE,
-                                             pageFileInfo.producePageFile,
-                                             pageFileInfo.consumePageFile,
-                                             pageFileInfo.producePageFileSize,
-                                             pageFileInfo.consumePageFileSize,
-                                             useUVA ? pageFileInfo.produceVA : 0,
-                                             useUVA ? pageFileInfo.consumeVA : 0 };
-
-            result = VMCIQPBroker_SetPageStore(pageFileInfo.handle,
-                                               &pageStore,
-                                               vmciLinux->context);
-         }
+         result = VMCIQPBroker_SetPageStore(pageFileInfo.handle,
+                                            pageFileInfo.produceVA,
+                                            pageFileInfo.consumeVA,
+                                            vmciLinux->context);
+
          VMCIQPBroker_Unlock();
+         cid = VMCIContext_GetId(vmciLinux->context);
+         Log(LGPFX"IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE (cid=%u,result=%d).\n",
+             cid, result);
 
          if (result < VMCI_SUCCESS) {
-            Log(LGPFX"IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE (cid=%u,result=%d).\n",
-                cid, result);
 
             retval = copy_to_user(&info->result, &result, sizeof result);
             if (retval != 0) {
@@ -994,32 +1086,21 @@ LinuxDriver_Ioctl(struct inode *inode,
 
       cid = VMCIContext_GetId(vmciLinux->context);
       VMCIQPBroker_Lock();
-      result = VMCIQPBroker_Detach(detachInfo.handle, vmciLinux->context,
-                                FALSE); /* Probe detach operation. */
+      result = VMCIQPBroker_Detach(detachInfo.handle, vmciLinux->context);
       Log(LGPFX"IOCTL_VMCI_QUEUEPAIR_DETACH (cid=%u,result=%d).\n",
           cid, result);
+      VMCIQPBroker_Unlock();
+
+      if (result == VMCI_SUCCESS &&
+          vmciLinux->userVersion < VMCI_VERSION_NOVMVM) {
+         result = VMCI_SUCCESS_LAST_DETACH;
+      }
 
       retval = copy_to_user(&info->result, &result, sizeof result);
       if (retval) {
-         /* Could not copy to userland, don't perform the actual detach. */
          retval = -EFAULT;
-      } else {
-         if (result >= VMCI_SUCCESS) {
-            /* Now perform the actual detach. */
-            int32 result2 = VMCIQPBroker_Detach(detachInfo.handle,
-                                             vmciLinux->context, TRUE);
-            if (UNLIKELY(result != result2)) {
-               /*
-                * This should never happen.  But it's better to log a warning
-                * than to crash the host.
-                */
-               Warning(LGPFX"VMCIQPBroker_Detach returned different results:  "
-                       "(previous=%d,current=%d).\n", result, result2);
-            }
-         }
       }
 
-      VMCIQPBroker_Unlock();
       break;
    }
 
@@ -1043,7 +1124,7 @@ LinuxDriver_Ioctl(struct inode *inode,
 
       cid = VMCIContext_GetId(vmciLinux->context);
       result = VMCIDatagramRequestWellKnownMap(mapInfo.wellKnownID, cid,
-                                              VMCIContext_GetPrivFlags(cid));
+                                               VMCIContext_GetPrivFlags(cid));
       retval = copy_to_user(&info->result, &result, sizeof result);
       if (retval) {
          retval = -EFAULT;
@@ -1116,7 +1197,7 @@ LinuxDriver_Ioctl(struct inode *inode,
 
       if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
          Log(LGPFX"IOCTL_VMCI_CTX_REMOVE_NOTIFICATION only valid for "
-            "contexts.\n");
+             "contexts.\n");
          retval = -EINVAL;
          break;
       }
@@ -1156,16 +1237,16 @@ LinuxDriver_Ioctl(struct inode *inode,
 
       cid = VMCIContext_GetId(vmciLinux->context);
       getInfo.result = VMCIContext_GetCheckpointState(cid, getInfo.cptType,
-                                                     &getInfo.bufSize,
-                                                     &cptBuf);
+                                                      &getInfo.bufSize,
+                                                      &cptBuf);
       if (getInfo.result == VMCI_SUCCESS && getInfo.bufSize) {
-        retval = copy_to_user((void *)(VA)getInfo.cptBuf, cptBuf,
-                              getInfo.bufSize);
-        VMCI_FreeKernelMem(cptBuf, getInfo.bufSize);
-        if (retval) {
-           retval = -EFAULT;
-           break;
-        }
+         retval = copy_to_user((void *)(VA)getInfo.cptBuf, cptBuf,
+                               getInfo.bufSize);
+         VMCI_FreeKernelMem(cptBuf, getInfo.bufSize);
+         if (retval) {
+            retval = -EFAULT;
+            break;
+         }
       }
       retval = copy_to_user((void *)ioarg, &getInfo, sizeof getInfo);
       if (retval) {
@@ -1195,21 +1276,21 @@ LinuxDriver_Ioctl(struct inode *inode,
       cptBuf = VMCI_AllocKernelMem(setInfo.bufSize, VMCI_MEMORY_NORMAL);
       if (cptBuf == NULL) {
          Log(LGPFX"Cannot allocate memory to set cpt state (type=%d).\n",
-            setInfo.cptType);
+             setInfo.cptType);
          retval = -ENOMEM;
          break;
       }
       retval = copy_from_user(cptBuf, (void *)(VA)setInfo.cptBuf,
-                             setInfo.bufSize);
+                              setInfo.bufSize);
       if (retval) {
-        VMCI_FreeKernelMem(cptBuf, setInfo.bufSize);
+         VMCI_FreeKernelMem(cptBuf, setInfo.bufSize);
          retval = -EFAULT;
          break;
       }
 
       cid = VMCIContext_GetId(vmciLinux->context);
       setInfo.result = VMCIContext_SetCheckpointState(cid, setInfo.cptType,
-                                                     setInfo.bufSize, cptBuf);
+                                                      setInfo.bufSize, cptBuf);
       VMCI_FreeKernelMem(cptBuf, setInfo.bufSize);
       retval = copy_to_user((void *)ioarg, &setInfo, sizeof setInfo);
       if (retval) {
@@ -2254,7 +2335,7 @@ dispatch_datagrams(unsigned long data)
 
    if (dev == NULL) {
       printk(KERN_DEBUG "vmci: dispatch_datagrams(): no vmci device"
-            "present.\n");
+             "present.\n");
       return;
    }
 
@@ -2265,7 +2346,7 @@ dispatch_datagrams(unsigned long data)
 
 
    VMCI_ReadDatagramsFromPort((VMCIIoHandle) 0, dev->ioaddr + VMCI_DATA_IN_ADDR,
-                             data_buffer, data_buffer_size);
+                              data_buffer, data_buffer_size);
 }
 
 
@@ -2293,7 +2374,7 @@ process_bitmap(unsigned long data)
 
    if (dev == NULL) {
       printk(KERN_DEBUG "vmci: process_bitmaps(): no vmci device"
-            "present.\n");
+             "present.\n");
       return;
    }
 
@@ -2506,7 +2587,7 @@ MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
 
 module_param_named(disable_msix, vmci_disable_msix, bool, 0);
 MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default="
-                __stringify(VMCI_DISABLE_MSIX) ")");
+                 __stringify(VMCI_DISABLE_MSIX) ")");
 
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface (VMCI).");
index 8fb5f4b2294db861a23097b0c20cb6cb2ee8a233..3344a4002c9a7adbfaf1ea54865d0e3e965c9243 100644 (file)
 
 struct VMCIQueueKernelIf {
    struct page **page;
+   struct page **headerPage;
+   VMCIMutex __mutex;
+   VMCIMutex *mutex;
+   Bool host;
+   size_t numPages;
 };
 
 typedef struct VMCIDelayedWorkInfo {
@@ -706,8 +711,8 @@ VMCI_SignalEvent(VMCIEvent *event)  // IN:
 
 void
 VMCI_WaitOnEvent(VMCIEvent *event,              // IN:
-                VMCIEventReleaseCB releaseCB,  // IN:
-                void *clientData)              // IN:
+                 VMCIEventReleaseCB releaseCB,  // IN:
+                 void *clientData)              // IN:
 {
    /*
     * XXX Should this be a TASK_UNINTERRUPTIBLE wait? I'm leaving it
@@ -910,8 +915,10 @@ VMCI_AllocQueue(uint64 size) // IN: size of queue (not including header)
    queue = (VMCIQueue *)((uint8 *)qHeader + PAGE_SIZE);
    queue->qHeader = qHeader;
    queue->kernelIf = (VMCIQueueKernelIf *)((uint8 *)queue + sizeof *queue);
+   queue->kernelIf->headerPage = NULL; // Unused in guest.
    queue->kernelIf->page = (struct page **)((uint8 *)queue->kernelIf +
                                             sizeof *(queue->kernelIf));
+   queue->kernelIf->host = FALSE;
 
    for (i = 0; i < numDataPages; i++) {
       queue->kernelIf->page[i] = alloc_pages(GFP_KERNEL, 0);
@@ -1317,6 +1324,65 @@ VMCIMemcpyFromQueue(void *dest,             // OUT:
 }
 
 
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * VMCIMemcpyToQueueLocal --
+ *
+ *      Copies from a given buffer to a local VMCI queue. On Linux, this is the
+ *      same as a regular copy.
+ *
+ * Results:
+ *      Zero on success, negative error code on failure.
+ *
+ * Side effects:
+ *      None.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+int
+VMCIMemcpyToQueueLocal(VMCIQueue *queue,         // OUT
+                       uint64 queueOffset,       // IN
+                       const void *src,          // IN
+                       size_t srcOffset,         // IN
+                       size_t size,              // IN
+                       int bufType)  // IN
+{
+   return __VMCIMemcpyToQueue(queue, queueOffset,
+                              (uint8 *)src + srcOffset, size, FALSE);;
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * VMCIMemcpyFromQueueLocal --
+ *
+ *      Copies to a given buffer from a VMCI Queue.
+ *
+ * Results:
+ *      Zero on success, negative error code on failure.
+ *
+ * Side effects:
+ *      None.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+int
+VMCIMemcpyFromQueueLocal(void *dest,             // OUT:
+                         size_t destOffset,      // IN:
+                         const VMCIQueue *queue, // IN:
+                         uint64 queueOffset,     // IN:
+                         size_t size,            // IN:
+                         int bufType)            // IN: Unused
+{
+   return __VMCIMemcpyFromQueue((uint8 *)dest + destOffset,
+                                queue, queueOffset, size, FALSE);
+}
+
+
 /*
  *----------------------------------------------------------------------------
  *
@@ -1440,12 +1506,21 @@ VMCIQueue *
 VMCIHost_AllocQueue(uint64 size) // IN:
 {
    VMCIQueue *queue;
-   const uint queueSize = sizeof *queue + sizeof *(queue->kernelIf);
+   const size_t numPages = CEILING(size, PAGE_SIZE) + 1;
+   const size_t queueSize = sizeof *queue + sizeof *(queue->kernelIf);
+   const size_t queuePageSize = numPages * sizeof *queue->kernelIf->page;
 
-   queue = VMCI_AllocKernelMem(queueSize, VMCI_MEMORY_NORMAL);
+   queue = VMCI_AllocKernelMem(queueSize + queuePageSize, VMCI_MEMORY_NORMAL);
    if (queue) {
       queue->qHeader = NULL;
       queue->kernelIf = (VMCIQueueKernelIf *)((uint8 *)queue + sizeof *queue);
+      queue->kernelIf->host = TRUE;
+      queue->kernelIf->mutex = NULL;
+      queue->kernelIf->numPages = numPages;
+      queue->kernelIf->headerPage = (struct page **)((uint8*)queue + queueSize);
+      queue->kernelIf->page = &queue->kernelIf->headerPage[1];
+      memset(queue->kernelIf->headerPage, 0,
+             sizeof *queue->kernelIf->headerPage * queue->kernelIf->numPages);
    }
 
    return queue;
@@ -1480,6 +1555,142 @@ VMCIHost_FreeQueue(VMCIQueue *queue,   // IN:
 }
 
 
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * VMCI_InitQueueMutex()
+ *
+ *       Initialize the mutex for the pair of queues.  This mutex is used to
+ *       protect the qHeader and the buffer from changing out from under any
+ *       users of either queue.  Of course, it's only any good if the mutexes
+ *       are actually acquired.  Queue structure must lie on non-paged memory
+ *       or we cannot guarantee access to the mutex.
+ *
+ * Results:
+ *       None.
+ *
+ * Side Effects:
+ *       None.
+ *
+ *----------------------------------------------------------------------------
+ */
+
+void
+VMCI_InitQueueMutex(VMCIQueue *produceQ, // IN/OUT
+                    VMCIQueue *consumeQ) // IN/OUT
+{
+   ASSERT(produceQ);
+   ASSERT(consumeQ);
+   ASSERT(produceQ->kernelIf);
+   ASSERT(consumeQ->kernelIf);
+
+   /*
+    * Only the host queue has shared state - the guest queues do not
+    * need to synchronize access using a queue mutex.
+    */
+
+   if (produceQ->kernelIf->host) {
+      produceQ->kernelIf->mutex = &produceQ->kernelIf->__mutex;
+      consumeQ->kernelIf->mutex = &produceQ->kernelIf->__mutex;
+      sema_init(produceQ->kernelIf->mutex, 1);
+   }
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * VMCI_CleanupQueueMutex()
+ *
+ *       Cleans up the mutex for the pair of queues.
+ *
+ * Results:
+ *       None.
+ *
+ * Side Effects:
+ *       None.
+ *
+ *----------------------------------------------------------------------------
+ */
+
+void
+VMCI_CleanupQueueMutex(VMCIQueue *produceQ, // IN/OUT
+                       VMCIQueue *consumeQ) // IN/OUT
+{
+   ASSERT(produceQ);
+   ASSERT(consumeQ);
+   ASSERT(produceQ->kernelIf);
+   ASSERT(consumeQ->kernelIf);
+
+   if (produceQ->kernelIf->host) {
+      produceQ->kernelIf->mutex = NULL;
+      consumeQ->kernelIf->mutex = NULL;
+   }
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * VMCI_AcquireQueueMutex()
+ *
+ *       Acquire the mutex for the queue.  Note that the produceQ and
+ *       the consumeQ share a mutex.  So, only one of the two need to
+ *       be passed in to this routine.  Either will work just fine.
+ *
+ * Results:
+ *       None.
+ *
+ * Side Effects:
+ *       May block the caller.
+ *
+ *----------------------------------------------------------------------------
+ */
+
+void
+VMCI_AcquireQueueMutex(VMCIQueue *queue) // IN
+{
+   ASSERT(queue);
+   ASSERT(queue->kernelIf);
+
+   if (queue->kernelIf->host) {
+      ASSERT(queue->kernelIf->mutex);
+      down(queue->kernelIf->mutex);
+   }
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * VMCI_ReleaseQueueMutex()
+ *
+ *       Release the mutex for the queue.  Note that the produceQ and
+ *       the consumeQ share a mutex.  So, only one of the two need to
+ *       be passed in to this routine.  Either will work just fine.
+ *
+ * Results:
+ *       None.
+ *
+ * Side Effects:
+ *       May block the caller.
+ *
+ *----------------------------------------------------------------------------
+ */
+
+void
+VMCI_ReleaseQueueMutex(VMCIQueue *queue) // IN
+{
+   ASSERT(queue);
+   ASSERT(queue->kernelIf);
+
+   if (queue->kernelIf->host) {
+      ASSERT(queue->kernelIf->mutex);
+      up(queue->kernelIf->mutex);
+   }
+}
+
+
 /*
  *-----------------------------------------------------------------------------
  *
@@ -1514,10 +1725,184 @@ VMCIReleasePages(struct page **pages,  // IN
 }
 
 
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * VMCIHost_RegisterUserMemory --
+ *
+ *       Registers the specification of the user pages used for backing a queue
+ *       pair. Enough information to map in pages is stored in the OS specific
+ *       part of the VMCIQueue structure.
+ *
+ * Results:
+ *       VMCI_SUCCESS on sucess, negative error code on failure.
+ *
+ * Side Effects:
+ *       None.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+int
+VMCIHost_RegisterUserMemory(QueuePairPageStore *pageStore,  // IN
+                            VMCIQueue *produceQ,            // OUT
+                            VMCIQueue *consumeQ)            // OUT
+{
+   VA64 produceUVA;
+   VA64 consumeUVA;
+
+   ASSERT(produceQ->kernelIf->headerPage && consumeQ->kernelIf->headerPage);
+
+   /*
+    * The new style and the old style mapping only differs in that we either
+    * get a single or two UVAs, so we split the single UVA range at the
+    * appropriate spot.
+    */
+
+   produceUVA = pageStore->pages;
+   consumeUVA = pageStore->pages + produceQ->kernelIf->numPages * PAGE_SIZE;
+   return VMCIHost_GetUserMemory(produceUVA, consumeUVA, produceQ, consumeQ);
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * VMCIHost_UnregisterUserMemory --
+ *
+ *       Releases and removes the references to user pages stored in the attach
+ *       struct.
+ *
+ * Results:
+ *       None
+ *
+ * Side Effects:
+ *       Pages are released from the page cache and may become
+ *       swappable again.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+void
+VMCIHost_UnregisterUserMemory(VMCIQueue *produceQ,         // IN/OUT
+                              VMCIQueue *consumeQ)         // IN/OUT
+{
+   ASSERT(produceQ->kernelIf);
+   ASSERT(consumeQ->kernelIf);
+   ASSERT(!produceQ->qHeader && !consumeQ->qHeader);
+
+   VMCIReleasePages(produceQ->kernelIf->headerPage, produceQ->kernelIf->numPages, TRUE);
+   memset(produceQ->kernelIf->headerPage, 0,
+          sizeof *produceQ->kernelIf->headerPage * produceQ->kernelIf->numPages);
+   VMCIReleasePages(consumeQ->kernelIf->headerPage, consumeQ->kernelIf->numPages, TRUE);
+   memset(consumeQ->kernelIf->headerPage, 0,
+          sizeof *consumeQ->kernelIf->headerPage * consumeQ->kernelIf->numPages);
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * VMCIHost_MapQueueHeaders --
+ *
+ *       Once VMCIHost_RegisterUserMemory has been performed on a
+ *       queue, the queue pair headers can be mapped into the
+ *       kernel. Once mapped, they must be unmapped with
+ *       VMCIHost_UnmapQueueHeaders prior to calling
+ *       VMCIHost_UnregisterUserMemory.
+ *
+ * Results:
+ *       VMCI_SUCCESS if pages are mapped, appropriate error code otherwise.
+ *
+ * Side Effects:
+ *       Pages are pinned.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+int
+VMCIHost_MapQueueHeaders(VMCIQueue *produceQ,  // IN/OUT
+                         VMCIQueue *consumeQ)  // IN/OUT
+{
+   int result;
+
+   if (!produceQ->qHeader || !consumeQ->qHeader) {
+      struct page *headers[2];
+
+      if (produceQ->qHeader != consumeQ->qHeader) {
+         return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+      }
+
+      if (produceQ->kernelIf->headerPage == NULL ||
+          *produceQ->kernelIf->headerPage == NULL) {
+         return VMCI_ERROR_UNAVAILABLE;
+      }
+
+      ASSERT(*produceQ->kernelIf->headerPage && *consumeQ->kernelIf->headerPage);
+
+      headers[0] = *produceQ->kernelIf->headerPage;
+      headers[1] = *consumeQ->kernelIf->headerPage;
+
+      produceQ->qHeader = vmap(headers, 2, VM_MAP, PAGE_KERNEL);
+      if (produceQ->qHeader != NULL) {
+         consumeQ->qHeader =
+            (VMCIQueueHeader *)((uint8 *)produceQ->qHeader + PAGE_SIZE);
+         result = VMCI_SUCCESS;
+      } else {
+         Log("vmap failed\n");
+         result = VMCI_ERROR_NO_MEM;
+     }
+   } else {
+      result = VMCI_SUCCESS;
+   }
+
+   return result;
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * VMCIHost_UnmapQueueHeaders --
+ *
+ *       Unmaps previously mapped queue pair headers from the kernel.
+ *
+ * Results:
+ *       VMCI_SUCCESS always.
+ *
+ * Side Effects:
+ *       Pages are unpinned.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+int
+VMCIHost_UnmapQueueHeaders(VMCIGuestMemID gid,   // IN
+                           VMCIQueue *produceQ,  // IN/OUT
+                           VMCIQueue *consumeQ)  // IN/OUT
+{
+   if (produceQ->qHeader) {
+      ASSERT(consumeQ->qHeader);
+
+      if (produceQ->qHeader < consumeQ->qHeader) {
+         vunmap(produceQ->qHeader);
+      } else {
+         vunmap(consumeQ->qHeader);
+      }
+      produceQ->qHeader = NULL;
+      consumeQ->qHeader = NULL;
+   }
+
+   return VMCI_SUCCESS;
+}
+
+
 /*
  *-----------------------------------------------------------------------------
  *
  * VMCIHost_GetUserMemory --
+ *
+ *
  *       Lock the user pages referenced by the {produce,consume}Buffer
  *       struct into memory and populate the {produce,consume}Pages
  *       arrays in the attach structure with them.
@@ -1532,94 +1917,47 @@ VMCIReleasePages(struct page **pages,  // IN
  */
 
 int
-VMCIHost_GetUserMemory(PageStoreAttachInfo *attach,      // IN/OUT
-                       VMCIQueue *produceQ,              // OUT
-                       VMCIQueue *consumeQ)              // OUT
+VMCIHost_GetUserMemory(VA64 produceUVA,       // IN
+                       VA64 consumeUVA,       // IN
+                       VMCIQueue *produceQ,   // OUT
+                       VMCIQueue *consumeQ)   // OUT
 {
    int retval;
    int err = VMCI_SUCCESS;
 
-
-   attach->producePages =
-      VMCI_AllocKernelMem(attach->numProducePages * sizeof attach->producePages[0],
-                          VMCI_MEMORY_NORMAL);
-   if (attach->producePages == NULL) {
-      return VMCI_ERROR_NO_MEM;
-   }
-   attach->consumePages =
-      VMCI_AllocKernelMem(attach->numConsumePages * sizeof attach->consumePages[0],
-                          VMCI_MEMORY_NORMAL);
-   if (attach->consumePages == NULL) {
-      err = VMCI_ERROR_NO_MEM;
-      goto errorDealloc;
-   }
-
    down_write(&current->mm->mmap_sem);
    retval = get_user_pages(current,
                            current->mm,
-                           (VA)attach->produceBuffer,
-                           attach->numProducePages,
+                           (VA)produceUVA,
+                           produceQ->kernelIf->numPages,
                            1, 0,
-                           attach->producePages,
+                           produceQ->kernelIf->headerPage,
                            NULL);
-   if (retval < attach->numProducePages) {
-      Log("get_user_pages(produce) failed: %d\n", retval);
-      VMCIReleasePages(attach->producePages, retval, FALSE);
+   if (retval < produceQ->kernelIf->numPages) {
+      Log("get_user_pages(produce) failed (retval=%d)\n", retval);
+      VMCIReleasePages(produceQ->kernelIf->headerPage, retval, FALSE);
       err = VMCI_ERROR_NO_MEM;
       goto out;
    }
 
    retval = get_user_pages(current,
                            current->mm,
-                           (VA)attach->consumeBuffer,
-                           attach->numConsumePages,
+                           (VA)consumeUVA,
+                           consumeQ->kernelIf->numPages,
                            1, 0,
-                           attach->consumePages,
+                           consumeQ->kernelIf->headerPage,
                            NULL);
-   if (retval < attach->numConsumePages) {
-      Log("get_user_pages(consume) failed: %d\n", retval);
-      VMCIReleasePages(attach->consumePages, retval, FALSE);
-      VMCIReleasePages(attach->producePages, attach->numProducePages, FALSE);
+   if (retval < consumeQ->kernelIf->numPages) {
+      Log("get_user_pages(consume) failed (retval=%d)\n", retval);
+      VMCIReleasePages(consumeQ->kernelIf->headerPage, retval, FALSE);
+      VMCIReleasePages(produceQ->kernelIf->headerPage,
+                       produceQ->kernelIf->numPages, FALSE);
       err = VMCI_ERROR_NO_MEM;
    }
 
 out:
    up_write(&current->mm->mmap_sem);
 
-   if (err == VMCI_SUCCESS) {
-      struct page *headers[2];
-
-      headers[0] = attach->producePages[0];
-      headers[1] = attach->consumePages[0];
-
-      produceQ->qHeader = vmap(headers, 2, VM_MAP, PAGE_KERNEL);
-      if (produceQ->qHeader != NULL) {
-         consumeQ->qHeader =
-            (VMCIQueueHeader *)((uint8 *)produceQ->qHeader + PAGE_SIZE);
-         produceQ->kernelIf->page = &attach->producePages[1];
-         consumeQ->kernelIf->page = &attach->consumePages[1];
-      } else {
-         Log("vmap failed\n");
-         VMCIReleasePages(attach->producePages, attach->numProducePages, FALSE);
-         VMCIReleasePages(attach->consumePages, attach->numConsumePages, FALSE);
-         err = VMCI_ERROR_NO_MEM;
-     }
-   }
-
-errorDealloc:
-   if (err < VMCI_SUCCESS) {
-      if (attach->producePages != NULL) {
-         VMCI_FreeKernelMem(attach->producePages,
-                            attach->numProducePages *
-                            sizeof attach->producePages[0]);
-      }
-      if (attach->consumePages != NULL) {
-         VMCI_FreeKernelMem(attach->consumePages,
-                            attach->numConsumePages *
-                            sizeof attach->consumePages[0]);
-      }
-   }
-
    return err;
 }
 
@@ -1642,25 +1980,12 @@ errorDealloc:
  */
 
 void
-VMCIHost_ReleaseUserMemory(PageStoreAttachInfo *attach,      // IN/OUT
-                           VMCIQueue *produceQ,              // OUT
-                           VMCIQueue *consumeQ)              // OUT
+VMCIHost_ReleaseUserMemory(VMCIQueue *produceQ,  // IN/OUT
+                           VMCIQueue *consumeQ)  // IN/OUT
 {
-   ASSERT(attach->producePages);
-   ASSERT(attach->consumePages);
-   ASSERT(produceQ->qHeader);
-
-   vunmap(produceQ->qHeader);
-
-   VMCIReleasePages(attach->producePages, attach->numProducePages, TRUE);
-   VMCIReleasePages(attach->consumePages, attach->numConsumePages, TRUE);
+   ASSERT(produceQ->kernelIf->headerPage);
 
-   VMCI_FreeKernelMem(attach->producePages,
-                      attach->numProducePages *
-                      sizeof attach->producePages[0]);
-   VMCI_FreeKernelMem(attach->consumePages,
-                      attach->numConsumePages *
-                      sizeof attach->consumePages[0]);
+   VMCIHost_UnregisterUserMemory(produceQ, consumeQ);
 }
 
 
@@ -1682,9 +2007,9 @@ VMCIHost_ReleaseUserMemory(PageStoreAttachInfo *attach,      // IN/OUT
 
 void
 VMCI_ReadPortBytes(VMCIIoHandle handle,  // IN: Unused
-                  VMCIIoPort port,      // IN
-                  uint8 *buffer,        // OUT
-                  size_t bufferLength)  // IN
+                   VMCIIoPort port,      // IN
+                   uint8 *buffer,        // OUT
+                   size_t bufferLength)  // IN
 {
    insb(port, buffer, bufferLength);
 }
index 7337ae46b4fd95bd251a6b9534ea0321633724b4..e1939f29db0feae81ddecdb5335b4833bed190e2 100644 (file)
@@ -25,8 +25,8 @@
 #ifndef _VMCI_VERSION_H_
 #define _VMCI_VERSION_H_
 
-#define VMCI_DRIVER_VERSION          9.3.3.0
-#define VMCI_DRIVER_VERSION_COMMAS   9,3,3,0
-#define VMCI_DRIVER_VERSION_STRING   "9.3.3.0"
+#define VMCI_DRIVER_VERSION          9.3.4.0
+#define VMCI_DRIVER_VERSION_COMMAS   9,3,4,0
+#define VMCI_DRIVER_VERSION_STRING   "9.3.4.0"
 
 #endif /* _VMCI_VERSION_H_ */
index b75dd06e59bdb95719353e6fa2fff91cf22d1b82..81a5ffd9fba95458e7c43e08c247eb3d813070ff 100644 (file)
@@ -138,6 +138,11 @@ int VMCIMemcpyToQueue(VMCIQueue *queue, uint64 queueOffset, const void *src,
 int VMCIMemcpyFromQueue(void *dest, size_t destOffset, const VMCIQueue *queue,
                         uint64 queueOffset, size_t size, BUF_TYPE bufType);
 
+int VMCIMemcpyToQueueLocal(VMCIQueue *queue, uint64 queueOffset, const void *src,
+                           size_t srcOffset, size_t size, BUF_TYPE bufType);
+int VMCIMemcpyFromQueueLocal(void *dest, size_t destOffset, const VMCIQueue *queue,
+                             uint64 queueOffset, size_t size, BUF_TYPE bufType);
+
 #if defined VMKERNEL || defined (SOLARIS)         || \
    (defined(__APPLE__) && !defined (VMX86_TOOLS)) || \
    (defined(__linux__) && defined(__KERNEL__))    || \