]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Logical Tape Set: use min heap for freelist.
authorJeff Davis <jdavis@postgresql.org>
Thu, 6 Feb 2020 18:06:25 +0000 (10:06 -0800)
committerJeff Davis <jdavis@postgresql.org>
Thu, 6 Feb 2020 18:09:45 +0000 (10:09 -0800)
Previously, the freelist of blocks was tracked as an
occasionally-sorted array. A min heap is more resilient to larger
freelists or more frequent changes between reading and writing.

Discussion: https://postgr.es/m/97c46a59c27f3c38e486ca170fcbc618d97ab049.camel%40j-davis.com

src/backend/utils/sort/logtape.c

index 42cfb1f9f9802880905fa547ba3891311bbdc5ae..33495f8b4b33922a538a4bc59fa2a8ba71f18837 100644 (file)
  * when reading, and read multiple blocks from the same tape in one go,
  * whenever the buffer becomes empty.
  *
- * To support the above policy of writing to the lowest free block,
- * ltsGetFreeBlock sorts the list of free block numbers into decreasing
- * order each time it is asked for a block and the list isn't currently
- * sorted.  This is an efficient way to handle it because we expect cycles
- * of releasing many blocks followed by re-using many blocks, due to
- * the larger read buffer.
+ * To support the above policy of writing to the lowest free block, the
+ * freelist is a min heap.
  *
  * Since all the bookkeeping and buffer memory is allocated with palloc(),
  * and the underlying file(s) are made with OpenTemporaryFile, all resources
@@ -170,7 +166,7 @@ struct LogicalTapeSet
        /*
         * File size tracking.  nBlocksWritten is the size of the underlying file,
         * in BLCKSZ blocks.  nBlocksAllocated is the number of blocks allocated
-        * by ltsGetFreeBlock(), and it is always greater than or equal to
+        * by ltsReleaseBlock(), and it is always greater than or equal to
         * nBlocksWritten.  Blocks between nBlocksAllocated and nBlocksWritten are
         * blocks that have been allocated for a tape, but have not been written
         * to the underlying file yet.  nHoleBlocks tracks the total number of
@@ -188,17 +184,11 @@ struct LogicalTapeSet
         * If forgetFreeSpace is true then any freed blocks are simply forgotten
         * rather than being remembered in freeBlocks[].  See notes for
         * LogicalTapeSetForgetFreeSpace().
-        *
-        * If blocksSorted is true then the block numbers in freeBlocks are in
-        * *decreasing* order, so that removing the last entry gives us the lowest
-        * free block.  We re-sort the blocks whenever a block is demanded; this
-        * should be reasonably efficient given the expected usage pattern.
         */
        bool            forgetFreeSpace;        /* are we remembering free blocks? */
-       bool            blocksSorted;   /* is freeBlocks[] currently in order? */
-       long       *freeBlocks;         /* resizable array */
-       int                     nFreeBlocks;    /* # of currently free blocks */
-       int                     freeBlocksLen;  /* current allocated length of freeBlocks[] */
+       long       *freeBlocks;         /* resizable array holding minheap */
+       long            nFreeBlocks;    /* # of currently free blocks */
+       Size            freeBlocksLen;  /* current allocated length of freeBlocks[] */
 
        /* The array of logical tapes. */
        int                     nTapes;                 /* # of logical tapes in set */
@@ -321,46 +311,88 @@ ltsReadFillBuffer(LogicalTapeSet *lts, LogicalTape *lt)
        return (lt->nbytes > 0);
 }
 
-/*
- * qsort comparator for sorting freeBlocks[] into decreasing order.
- */
-static int
-freeBlocks_cmp(const void *a, const void *b)
+static inline void
+swap_nodes(long *heap, unsigned long a, unsigned long b)
+{
+       unsigned long swap;
+
+       swap = heap[a];
+       heap[a] = heap[b];
+       heap[b] = swap;
+}
+
+static inline unsigned long
+left_offset(unsigned long i)
+{
+       return 2 * i + 1;
+}
+
+static inline unsigned long
+right_offset(unsigned i)
+{
+       return 2 * i + 2;
+}
+
+static inline unsigned long
+parent_offset(unsigned long i)
 {
-       long            ablk = *((const long *) a);
-       long            bblk = *((const long *) b);
-
-       /* can't just subtract because long might be wider than int */
-       if (ablk < bblk)
-               return 1;
-       if (ablk > bblk)
-               return -1;
-       return 0;
+       return (i - 1) / 2;
 }
 
 /*
- * Select a currently unused block for writing to.
+ * Select the lowest currently unused block by taking the first element from
+ * the freelist min heap.
  */
 static long
 ltsGetFreeBlock(LogicalTapeSet *lts)
 {
-       /*
-        * If there are multiple free blocks, we select the one appearing last in
-        * freeBlocks[] (after sorting the array if needed).  If there are none,
-        * assign the next block at the end of the file.
-        */
-       if (lts->nFreeBlocks > 0)
+       long    *heap = lts->freeBlocks;
+       long     blocknum;
+       int              heapsize;
+       unsigned long pos;
+
+       /* freelist empty; allocate a new block */
+       if (lts->nFreeBlocks == 0)
+               return lts->nBlocksAllocated++;
+
+       if (lts->nFreeBlocks == 1)
        {
-               if (!lts->blocksSorted)
-               {
-                       qsort((void *) lts->freeBlocks, lts->nFreeBlocks,
-                                 sizeof(long), freeBlocks_cmp);
-                       lts->blocksSorted = true;
-               }
-               return lts->freeBlocks[--lts->nFreeBlocks];
+               lts->nFreeBlocks--;
+               return lts->freeBlocks[0];
        }
-       else
-               return lts->nBlocksAllocated++;
+
+       /* take top of minheap */
+       blocknum = heap[0];
+
+       /* replace with end of minheap array */
+       heap[0] = heap[--lts->nFreeBlocks];
+
+       /* sift down */
+       pos = 0;
+       heapsize = lts->nFreeBlocks;
+       while (true)
+       {
+               unsigned long left  = left_offset(pos);
+               unsigned long right = right_offset(pos);
+               unsigned long min_child;
+
+               if (left < heapsize && right < heapsize)
+                       min_child = (heap[left] < heap[right]) ? left : right;
+               else if (left < heapsize)
+                       min_child = left;
+               else if (right < heapsize)
+                       min_child = right;
+               else
+                       break;
+
+               if (heap[min_child] >= heap[pos])
+                       break;
+
+               swap_nodes(heap, min_child, pos);
+               pos = min_child;
+       }
+
+       return blocknum;
 }
 
 /*
@@ -369,7 +401,8 @@ ltsGetFreeBlock(LogicalTapeSet *lts)
 static void
 ltsReleaseBlock(LogicalTapeSet *lts, long blocknum)
 {
-       int                     ndx;
+       long    *heap;
+       unsigned long pos;
 
        /*
         * Do nothing if we're no longer interested in remembering free space.
@@ -382,19 +415,35 @@ ltsReleaseBlock(LogicalTapeSet *lts, long blocknum)
         */
        if (lts->nFreeBlocks >= lts->freeBlocksLen)
        {
+               /*
+                * If the freelist becomes very large, just return and leak this free
+                * block.
+                */
+               if (lts->freeBlocksLen * 2 > MaxAllocSize)
+                       return;
+
                lts->freeBlocksLen *= 2;
                lts->freeBlocks = (long *) repalloc(lts->freeBlocks,
                                                                                        lts->freeBlocksLen * sizeof(long));
        }
 
-       /*
-        * Add blocknum to array, and mark the array unsorted if it's no longer in
-        * decreasing order.
-        */
-       ndx = lts->nFreeBlocks++;
-       lts->freeBlocks[ndx] = blocknum;
-       if (ndx > 0 && lts->freeBlocks[ndx - 1] < blocknum)
-               lts->blocksSorted = false;
+       heap = lts->freeBlocks;
+       pos = lts->nFreeBlocks;
+
+       /* place entry at end of minheap array */
+       heap[pos] = blocknum;
+       lts->nFreeBlocks++;
+
+       /* sift up */
+       while (pos != 0)
+       {
+               unsigned long parent = parent_offset(pos);
+               if (heap[parent] < heap[pos])
+                       break;
+
+               swap_nodes(heap, parent, pos);
+               pos = parent;
+       }
 }
 
 /*
@@ -524,7 +573,6 @@ LogicalTapeSetCreate(int ntapes, TapeShare *shared, SharedFileSet *fileset,
        lts->nBlocksWritten = 0L;
        lts->nHoleBlocks = 0L;
        lts->forgetFreeSpace = false;
-       lts->blocksSorted = true;       /* a zero-length array is sorted ... */
        lts->freeBlocksLen = 32;        /* reasonable initial guess */
        lts->freeBlocks = (long *) palloc(lts->freeBlocksLen * sizeof(long));
        lts->nFreeBlocks = 0;