[thirdparty/gcc.git] / libstdc++-v3 / src / c++17 / memory_resource.cc

// <memory_resource> implementation -*- C++ -*-

// Copyright (C) 2018-2019 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library.  This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.

// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.

// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
// <http://www.gnu.org/licenses/>.

#include <memory_resource>
#include <algorithm>			// lower_bound, rotate
#include <atomic>
#include <bit>				// __ceil2, __log2p1
#include <new>
#if ATOMIC_POINTER_LOCK_FREE != 2
# include <bits/std_mutex.h>	// std::mutex, std::lock_guard
# include <bits/move.h>		// std::exchange
#endif

namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
namespace pmr
{
  namespace
  {
    class newdel_res_t final : public memory_resource
    {
      void*
      do_allocate(size_t __bytes, size_t __alignment) override
      { return ::operator new(__bytes, std::align_val_t(__alignment)); }

      void
      do_deallocate(void* __p, size_t __bytes, size_t __alignment) noexcept
      override
      { ::operator delete(__p, __bytes, std::align_val_t(__alignment)); }

      bool
      do_is_equal(const memory_resource& __other) const noexcept override
      { return &__other == this; }
    };

    class null_res_t final : public memory_resource
    {
      void*
      do_allocate(size_t, size_t) override
      { std::__throw_bad_alloc(); }

      void
      do_deallocate(void*, size_t, size_t) noexcept override
      { }

      bool
      do_is_equal(const memory_resource& __other) const noexcept override
      { return &__other == this; }
    };

    template<typename T>
      struct constant_init
      {
	union {
	  unsigned char unused;
	  T obj;
	};
	constexpr constant_init() : obj() { }

	template<typename U>
	  explicit constexpr constant_init(U arg) : obj(arg) { }

	~constant_init() { /* do nothing, union member is not destroyed */ }
      };

    constant_init<newdel_res_t> newdel_res{};
    constant_init<null_res_t> null_res{};
#if ATOMIC_POINTER_LOCK_FREE == 2
    using atomic_mem_res = atomic<memory_resource*>;
# define _GLIBCXX_ATOMIC_MEM_RES_CAN_BE_CONSTANT_INITIALIZED
#elif defined(_GLIBCXX_HAS_GTHREADS)
    // Can't use pointer-width atomics, define a type using a mutex instead:
    struct atomic_mem_res
    {
# ifdef __GTHREAD_MUTEX_INIT
#  define _GLIBCXX_ATOMIC_MEM_RES_CAN_BE_CONSTANT_INITIALIZED
      // std::mutex has constexpr constructor
      constexpr
# endif
      atomic_mem_res(memory_resource* r) : val(r) { }

      mutex mx;
      memory_resource* val;

      memory_resource* load()
      {
	lock_guard<mutex> lock(mx);
	return val;
      }

      memory_resource* exchange(memory_resource* r)
      {
	lock_guard<mutex> lock(mx);
	return std::exchange(val, r);
      }
    };
#else
# define _GLIBCXX_ATOMIC_MEM_RES_CAN_BE_CONSTANT_INITIALIZED
    // Single-threaded, no need for synchronization
    struct atomic_mem_res
    {
      constexpr
      atomic_mem_res(memory_resource* r) : val(r) { }

      memory_resource* val;

      memory_resource* load() const
      {
	return val;
      }

      memory_resource* exchange(memory_resource* r)
      {
	return std::exchange(val, r);
      }
    };
#endif // ATOMIC_POINTER_LOCK_FREE == 2

#ifdef _GLIBCXX_ATOMIC_MEM_RES_CAN_BE_CONSTANT_INITIALIZED
    constant_init<atomic_mem_res> default_res{&newdel_res.obj};
#else
# include "default_resource.h"
#endif
  } // namespace

  memory_resource*
  new_delete_resource() noexcept
  { return &newdel_res.obj; }

  memory_resource*
  null_memory_resource() noexcept
  { return &null_res.obj; }

  memory_resource*
  set_default_resource(memory_resource* r) noexcept
  {
    if (r == nullptr)
      r = new_delete_resource();
    return default_res.obj.exchange(r);
  }

  memory_resource*
  get_default_resource() noexcept
  { return default_res.obj.load(); }

  // Member functions for std::pmr::monotonic_buffer_resource

  // Memory allocated by the upstream resource is managed in a linked list
  // of _Chunk objects. A _Chunk object recording the size and alignment of
  // the allocated block and a pointer to the previous chunk is placed
  // at end of the block.
  class monotonic_buffer_resource::_Chunk
  {
  public:
    // Return the address and size of a block of memory allocated from __r,
    // of at least __size bytes and aligned to __align.
    // Add a new _Chunk to the front of the linked list at __head.
    static pair<void*, size_t>
    allocate(memory_resource* __r, size_t __size, size_t __align,
	     _Chunk*& __head)
    {
      __size = std::__ceil2(__size + sizeof(_Chunk));
      void* __p = __r->allocate(__size, __align);
      // Add a chunk defined by (__p, __size, __align) to linked list __head.
      void* const __back = (char*)__p + __size - sizeof(_Chunk);
      __head = ::new(__back) _Chunk(__size, __align, __head);
      return { __p, __size - sizeof(_Chunk) };
    }

    // Return every chunk in linked list __head to resource __r.
    static void
    release(_Chunk*& __head, memory_resource* __r) noexcept
    {
      _Chunk* __next = __head;
      __head = nullptr;
      while (__next)
	{
	  _Chunk* __ch = __next;
	  __builtin_memcpy(&__next, __ch->_M_next, sizeof(_Chunk*));

	  __glibcxx_assert(__ch->_M_canary != 0);
	  __glibcxx_assert(__ch->_M_canary == (__ch->_M_size|__ch->_M_align));

	  if (__ch->_M_canary != (__ch->_M_size | __ch->_M_align))
	    return; // buffer overflow detected!

	  size_t __size = (1u << __ch->_M_size);
	  size_t __align = (1u << __ch->_M_align);
	  void* __start = (char*)(__ch + 1) - __size;
	  __r->deallocate(__start, __size, __align);
	}
    }

  private:
    _Chunk(size_t __size, size_t __align, _Chunk* __next) noexcept
    : _M_size(std::__log2p1(__size) - 1),
      _M_align(std::__log2p1(__align) - 1)
    {
      __builtin_memcpy(_M_next, &__next, sizeof(__next));
      _M_canary = _M_size | _M_align;
    }

    unsigned char _M_canary;
    unsigned char _M_size;
    unsigned char _M_align;
    unsigned char _M_next[sizeof(_Chunk*)];
  };

  void
  monotonic_buffer_resource::_M_new_buffer(size_t bytes, size_t alignment)
  {
    // Need to check this somewhere, so put it here:
    static_assert(alignof(monotonic_buffer_resource::_Chunk) == 1);

    const size_t n = std::max(bytes, _M_next_bufsiz);
    const size_t m = std::max(alignment, alignof(std::max_align_t));
    auto [p, size] = _Chunk::allocate(_M_upstream, n, m, _M_head);
    _M_current_buf = p;
    _M_avail = size;
    _M_next_bufsiz *= _S_growth_factor;
  }

  void
  monotonic_buffer_resource::_M_release_buffers() noexcept
  {
    _Chunk::release(_M_head, _M_upstream);
  }

  // Helper types for synchronized_pool_resource & unsynchronized_pool_resource

  namespace {

  // Simple bitset with runtime size.
  // Tracks which blocks in a pool chunk are used/unused.
  struct bitset
  {
    using word = uint64_t;
    using size_type // unsigned integer type with no more than 32 bits
      = conditional_t<numeric_limits<size_t>::digits <= 32, size_t, uint32_t>;

    static constexpr unsigned bits_per_word = numeric_limits<word>::digits;

    // The bitset does not own p
    bitset(void* p, size_type num_blocks)
    : _M_words(static_cast<word*>(p)), _M_size(num_blocks),
      _M_next_word(0)
    {
      const size_type last_word = num_blocks / bits_per_word;
      __builtin_memset(_M_words, 0, last_word * sizeof(*_M_words));
      // Set bits beyond _M_size, so they are not treated as free blocks:
      if (const size_type extra_bits = num_blocks % bits_per_word)
	_M_words[last_word] = word(-1) << extra_bits;
      __glibcxx_assert( empty() );
      __glibcxx_assert( free() == num_blocks );
    }

    bitset() = default;
    ~bitset() = default;

    // Number of blocks
    size_type size() const noexcept { return _M_size; }

    // Number of free blocks (unset bits)
    size_type free() const noexcept
    {
      size_type n = 0;
      for (size_type i = _M_next_word; i < nwords(); ++i)
	n += (bits_per_word - std::__popcount(_M_words[i]));
      return n;
    }

    // True if there are no free blocks (all bits are set)
    bool full() const noexcept
    {
      if (_M_next_word >= nwords())
	return true;
      // For a bitset with size() > (max_blocks_per_chunk() - 64) we will
      // have nwords() == (max_word_index() + 1) and so _M_next_word will
      // never be equal to nwords().
      // In that case, check if the last word is full:
      if (_M_next_word == max_word_index())
	return _M_words[_M_next_word] == word(-1);
      return false;
    }

    // True if size() != 0 and all blocks are free (no bits are set).
    bool empty() const noexcept
    {
      if (nwords() == 0)
	return false;
      if (_M_next_word != 0)
	return false;
      for (size_type i = 0; i < nwords() - 1; ++i)
	if (_M_words[i] != 0)
	  return false;
      word last = _M_words[nwords() - 1];
      if (const size_type extra_bits = size() % bits_per_word)
	last <<= (bits_per_word - extra_bits);
      return last == 0;
    }

    void reset() noexcept
    {
      _M_words = nullptr;
      _M_size = _M_next_word = 0;
    }

    bool operator[](size_type n) const noexcept
    {
      __glibcxx_assert( n < _M_size );
      const size_type wd = n / bits_per_word;
      const word bit = word(1) << (n % bits_per_word);
      return _M_words[wd] & bit;
    }

    size_type get_first_unset() noexcept
    {
      const size_type wd = _M_next_word;
      if (wd < nwords())
	{
	  const size_type n = std::__countr_one(_M_words[wd]);
	  if (n < bits_per_word)
	    {
	      const word bit = word(1) << n;
	      _M_words[wd] |= bit;
	      update_next_word();
	      return (wd * bits_per_word) + n;
	    }
	}
      return size_type(-1);
    }

    void set(size_type n) noexcept
    {
      __glibcxx_assert( n < _M_size );
      const size_type wd = n / bits_per_word;
      const word bit = word(1) << (n % bits_per_word);
      _M_words[wd] |= bit;
      if (wd == _M_next_word)
	update_next_word();
    }

    void clear(size_type n) noexcept
    {
      __glibcxx_assert( n < _M_size );
      const size_type wd = n / bits_per_word;
      const word bit = word(1) << (n % bits_per_word);
      _M_words[wd] &= ~bit;
      if (wd < _M_next_word)
	_M_next_word = wd;
    }

    // Update _M_next_word to refer to the next word with an unset bit.
    // The size of the _M_next_word bit-field means it cannot represent
    // the maximum possible nwords() value. To avoid wraparound to zero
    // this function saturates _M_next_word at max_word_index().
    void update_next_word() noexcept
    {
      size_type next = _M_next_word;
      while (_M_words[next] == word(-1) && ++next < nwords())
	{ }
      _M_next_word = std::min(next, max_word_index());
    }

    void swap(bitset& b) noexcept
    {
      std::swap(_M_words, b._M_words);
      size_type tmp = _M_size;
      _M_size = b._M_size;
      b._M_size = tmp;
      tmp = _M_next_word;
      _M_next_word = b._M_next_word;
      b._M_next_word = tmp;
    }

    size_type nwords() const noexcept
    { return (_M_size + bits_per_word - 1) / bits_per_word; }

    // Maximum value that can be stored in bitset::_M_size member (approx 500k)
    static constexpr size_type max_blocks_per_chunk() noexcept
    { return (size_type(1) << _S_size_digits) - 1; }

    // Maximum value that can be stored in bitset::_M_next_word member (8191).
    static constexpr size_type max_word_index() noexcept
    { return (max_blocks_per_chunk() + bits_per_word - 1) / bits_per_word; }

    word* data() const noexcept { return _M_words; }

  private:
    static constexpr unsigned _S_size_digits
      = (numeric_limits<size_type>::digits
	  + std::__log2p1(bits_per_word) - 1) / 2;

    word* _M_words = nullptr;
    // Number of blocks represented by the bitset:
    size_type _M_size : _S_size_digits;
    // Index of the first word with unset bits:
    size_type _M_next_word : numeric_limits<size_type>::digits - _S_size_digits;
  };

  // A "chunk" belonging to a pool.
  // A chunk contains many blocks of the same size.
  // Derived from bitset to reuse its tail-padding.
  struct chunk : bitset
  {
    chunk() = default;

    // p points to the start of a chunk of size bytes in length.
    // The chunk has space for n blocks, followed by a bitset of size n
    // that begins at address words.
    // This object does not own p or words, the caller will free it.
    chunk(void* p, uint32_t bytes, void* words, size_t n)
    : bitset(words, n),
      _M_bytes(bytes),
      _M_p(static_cast<std::byte*>(p))
    { __glibcxx_assert(bytes <= chunk::max_bytes_per_chunk()); }

    chunk(chunk&& c) noexcept
    : bitset(std::move(c)), _M_bytes(c._M_bytes), _M_p(c._M_p)
    {
      c._M_bytes = 0;
      c._M_p = nullptr;
      c.reset();
    }

    chunk& operator=(chunk&& c) noexcept
    {
      swap(c);
      return *this;
    }

    // Allocated size of chunk:
    uint32_t _M_bytes = 0;
    // Start of allocated chunk:
    std::byte* _M_p = nullptr;

    // True if there are free blocks in this chunk
    using bitset::full;
    // Number of blocks in this chunk
    using bitset::size;

    static constexpr uint32_t max_bytes_per_chunk() noexcept
    { return numeric_limits<decltype(_M_bytes)>::max(); }

    // Determine if block with address p and size block_size
    // is contained within this chunk.
    bool owns(void* p, size_t block_size)
    {
      std::less_equal<uintptr_t> less_equal;
      return less_equal(reinterpret_cast<uintptr_t>(_M_p),
			reinterpret_cast<uintptr_t>(p))
	&& less_equal(reinterpret_cast<uintptr_t>(p) + block_size,
		      reinterpret_cast<uintptr_t>(bitset::data()));
    }

    // Allocate next available block of block_size bytes from this chunk.
    void* reserve(size_t block_size) noexcept
    {
      const size_type n = get_first_unset();
      if (n == size_type(-1))
	return nullptr;
      return _M_p + (n * block_size);
    }

    // Deallocate a single block of block_size bytes
    void release(void* vp, size_t block_size)
    {
      __glibcxx_assert( owns(vp, block_size) );
      const size_t offset = static_cast<std::byte*>(vp) - _M_p;
      // Pointer is correctly aligned for a block in this chunk:
      __glibcxx_assert( (offset % block_size) == 0 );
      // Block has been allocated:
      __glibcxx_assert( (*this)[offset / block_size] == true );
      bitset::clear(offset / block_size);
    }

    // Deallocate a single block if it belongs to this chunk.
    bool try_release(void* p, size_t block_size)
    {
      if (!owns(p, block_size))
	return false;
      release(p, block_size);
      return true;
    }

    void swap(chunk& c) noexcept
    {
      std::swap(_M_bytes, c._M_bytes);
      std::swap(_M_p, c._M_p);
      bitset::swap(c);
    }

    bool operator<(const chunk& c) const noexcept
    { return std::less<const void*>{}(_M_p, c._M_p); }

    friend void swap(chunk& l, chunk& r) { l.swap(r); }

    friend bool operator<(const void* p, const chunk& c) noexcept
    { return std::less<const void*>{}(p, c._M_p); }
  };

  // For 64-bit pointers this is the size of three pointers i.e. 24 bytes.
  // For 32-bit and 20-bit pointers it's four pointers (16 bytes).
  // For 16-bit pointers it's five pointers (10 bytes).
  // TODO pad 64-bit to 4*sizeof(void*) to avoid splitting across cache lines?
  static_assert(sizeof(chunk)
      == sizeof(bitset::size_type) + sizeof(uint32_t) + 2 * sizeof(void*));

  // An oversized allocation that doesn't fit in a pool.
  struct big_block
  {
    // Alignment must be a power-of-two so we only need to use enough bits
    // to store the power, not the actual value:
    static constexpr unsigned _S_alignbits
      = std::__log2p1((unsigned)numeric_limits<size_t>::digits - 1);
    // Use the remaining bits to store the size:
    static constexpr unsigned _S_sizebits
      = numeric_limits<size_t>::digits - _S_alignbits;
    // The maximum value that can be stored in _S_size
    static constexpr size_t all_ones = size_t(-1) >> _S_alignbits;
    // The minimum size of a big block (smaller sizes will be rounded up).
    static constexpr size_t min = 1u << _S_alignbits;

    big_block(size_t bytes, size_t alignment)
    : _M_size(alloc_size(bytes) >> _S_alignbits),
      _M_align_exp(std::__log2p1(alignment) - 1u)
    { }

    void* pointer = nullptr;
    size_t _M_size : numeric_limits<size_t>::digits - _S_alignbits;
    size_t _M_align_exp : _S_alignbits;

    size_t size() const noexcept
    {
      // If all bits are set in _M_size it means the maximum possible size:
      if (__builtin_expect(_M_size == (size_t(-1) >> _S_alignbits), false))
	return (size_t)-1;
      else
	return _M_size << _S_alignbits;
    }

    size_t align() const noexcept { return size_t(1) << _M_align_exp; }

    // Calculate size to be allocated instead of requested number of bytes.
    // The requested value will be rounded up to a multiple of big_block::min,
    // so the low _S_alignbits bits are all zero and don't need to be stored.
    static constexpr size_t alloc_size(size_t bytes) noexcept
    {
      const size_t s = bytes + min - 1u;
      if (__builtin_expect(s < bytes, false))
	return size_t(-1); // addition wrapped past zero, return max value
      else
	return s & ~(min - 1u);
    }

    friend bool operator<(void* p, const big_block& b) noexcept
    { return less<void*>{}(p, b.pointer); }

    friend bool operator<(const big_block& b, void* p) noexcept
    { return less<void*>{}(b.pointer, p); }
  };

  static_assert(sizeof(big_block) == (2 * sizeof(void*)));

  } // namespace

  // A pool that serves blocks of a particular size.
  // Each pool manages a number of chunks.
  // When a pool is full it is replenished by allocating another chunk.
  struct __pool_resource::_Pool
  {
    // Smallest supported block size
    static constexpr unsigned _S_min_block
      = std::max(sizeof(void*), alignof(bitset::word));

    _Pool(size_t __block_size, size_t __blocks_per_chunk)
    : _M_chunks(),
      _M_block_sz(__block_size),
      _M_blocks_per_chunk(__blocks_per_chunk)
    { }

    // Must call release(r) before destruction!
    ~_Pool() { __glibcxx_assert(_M_chunks.empty()); }

    _Pool(_Pool&&) noexcept = default;
    _Pool& operator=(_Pool&&) noexcept = default;

    // Size of blocks in this pool
    size_t block_size() const noexcept
    { return _M_block_sz; }

    // Allocate a block if the pool is not full, otherwise return null.
    void* try_allocate() noexcept
    {
      const size_t blocksz = block_size();
      if (!_M_chunks.empty())
	{
	  auto& last = _M_chunks.back();
	  if (void* p = last.reserve(blocksz))
	    return p;
	  // TODO last is full, so move another chunk to the back instead?
	  for (auto it = _M_chunks.begin(); it != &last; ++it)
	    if (void* p = it->reserve(blocksz))
	      return p;
	}
      return nullptr;
    }

    // Allocate a block from the pool, replenishing from upstream if needed.
    void* allocate(memory_resource* r, const pool_options& opts)
    {
      if (void* p = try_allocate())
	return p;
      replenish(r, opts);
      return _M_chunks.back().reserve(block_size());
    }

    // Return a block to the pool.
    bool deallocate(memory_resource*, void* p)
    {
      const size_t blocksz = block_size();
      if (__builtin_expect(!_M_chunks.empty(), true))
	{
	  auto& last = _M_chunks.back();
	  if (last.try_release(p, blocksz))
	    return true;
	  auto it = std::upper_bound(_M_chunks.begin(), &last, p);
	  if (it != _M_chunks.begin())
	    {
	      it--;
	      if (it->try_release(p, blocksz))
		// If chunk is empty could return to upstream, but we don't
		// currently do that. Pools only increase in size.
		return true;
	    }
	}
      return false;
    }

    void replenish(memory_resource* __r, const pool_options& __opts)
    {
      using word = chunk::word;
      const size_t __blocks = _M_blocks_per_chunk;
      const auto __bits = chunk::bits_per_word;
      const size_t __words = (__blocks + __bits - 1) / __bits;
      const size_t __block_size = block_size();
      size_t __bytes = __blocks * __block_size + __words * sizeof(word);
      size_t __alignment = std::__ceil2(__block_size);
      void* __p = __r->allocate(__bytes, __alignment);
      __try
	{
	  size_t __n = __blocks * __block_size;
	  void* __pwords = static_cast<char*>(__p) + __n;
	  _M_chunks.insert(chunk(__p, __bytes, __pwords, __blocks), __r);
	}
      __catch (...)
	{
	  __r->deallocate(__p, __bytes, __alignment);
	}
      if (_M_blocks_per_chunk < __opts.max_blocks_per_chunk)
	{
	  const size_t max_blocks
	    = (chunk::max_bytes_per_chunk() - sizeof(word))
	    / (__block_size + 0.125);
	  _M_blocks_per_chunk = std::min({
	      max_blocks,
	      __opts.max_blocks_per_chunk,
	      (size_t)_M_blocks_per_chunk * 2
	  });
	}
    }

    void release(memory_resource* __r)
    {
      const size_t __alignment = std::__ceil2(block_size());
      for (auto& __c : _M_chunks)
	if (__c._M_p)
	  __r->deallocate(__c._M_p, __c._M_bytes, __alignment);
      _M_chunks.clear(__r);
    }

    // A "resourceless vector" instead of pmr::vector, to save space.
    // All resize operations need to be passed a memory resource, which
    // obviously needs to be the same one every time.
    // Chunks are kept sorted by address of their first block, except for
    // the most recently-allocated Chunk which is at the end of the vector.
    struct vector
    {
      using value_type = chunk;
      using size_type = unsigned;
      using iterator = value_type*;

      // A vector owns its data pointer but not memory held by its elements.
      chunk* data = nullptr;
      size_type size = 0;
      size_type capacity = 0;

      vector() = default;

      vector(size_type __n, memory_resource* __r)
      : data(polymorphic_allocator<value_type>(__r).allocate(__n)),
	capacity(__n)
      { }

      // Must call clear(r) before destruction!
      ~vector() { __glibcxx_assert(data == nullptr); }

      vector(vector&& __rval) noexcept
	: data(__rval.data), size(__rval.size), capacity(__rval.capacity)
      {
	__rval.data = nullptr;
	__rval.capacity = __rval.size = 0;
      }

      vector& operator=(vector&& __rval) noexcept
      {
	__glibcxx_assert(data == nullptr);
	data = __rval.data;
	size = __rval.size;
	capacity = __rval.capacity;
	__rval.data = nullptr;
	__rval.capacity = __rval.size = 0;
	return *this;
      }

      // void resize(size_type __n, memory_resource* __r);
      // void reserve(size_type __n, memory_resource* __r);

      void clear(memory_resource* __r)
      {
	if (!data)
	  return;
	// Chunks must be individually freed before clearing the vector.
	std::destroy(begin(), end());
	polymorphic_allocator<value_type>(__r).deallocate(data, capacity);
	data = nullptr;
	capacity = size = 0;
      }

      // Sort existing elements then insert new one at the end.
      iterator insert(chunk&& c, memory_resource* r)
      {
	if (size < capacity)
	  {
	    if (size > 1)
	      {
		auto mid = end() - 1;
		std::rotate(std::lower_bound(begin(), mid, *mid), mid, end());
	      }
	  }
	else if (size > 0)
	  {
	    polymorphic_allocator<value_type> __alloc(r);
	    auto __mid = std::lower_bound(begin(), end() - 1, back());
	    auto __p = __alloc.allocate(capacity * 1.5);
	    // move [begin,__mid) to new storage
	    auto __p2 = std::move(begin(), __mid, __p);
	    // move end-1 to new storage
	    *__p2 = std::move(back());
	    // move [__mid,end-1) to new storage
	    std::move(__mid, end() - 1, ++__p2);
	    std::destroy(begin(), end());
	    __alloc.deallocate(data, capacity);
	    data = __p;
	    capacity *= 1.5;
	  }
	else
	  {
	    polymorphic_allocator<value_type> __alloc(r);
	    data = __alloc.allocate(capacity = 8);
	  }
	auto back = ::new (data + size) chunk(std::move(c));
	__glibcxx_assert(std::is_sorted(begin(), back));
	++size;
	return back;
      }

      iterator begin() const { return data; }
      iterator end() const { return data + size; }

      bool empty() const noexcept { return size == 0; }

      value_type& back() { return data[size - 1]; }
    };

    vector _M_chunks;
    unsigned _M_block_sz; 	// size of blocks allocated from this pool
    unsigned _M_blocks_per_chunk;	// number of blocks to allocate next
  };

  // An oversized allocation that doesn't fit in a pool.
  struct __pool_resource::_BigBlock : big_block
  {
    using big_block::big_block;
  };

  namespace {

  constexpr size_t pool_sizes[] = {
      8, 16, 24,
      32, 48,
      64, 80, 96, 112,
      128, 192,
      256, 320, 384, 448,
      512, 768,
#if __SIZE_WIDTH__ > 16
      1024, 1536,
      2048, 3072,
#if __SIZE_WIDTH__ > 20
      1<<12, 1<<13, 1<<14,
      1<<15, 1<<16, 1<<17,
      1<<20, 1<<21, 1<<22 // 4MB should be enough for anybody
#endif
#endif
  };

  pool_options
  munge_options(pool_options opts)
  {
    // The values in the returned struct may differ from those supplied
    // to the pool resource constructor in that values of zero will be
    // replaced with implementation-defined defaults, and sizes may be
    // rounded to unspecified granularity.

    // max_blocks_per_chunk sets the absolute maximum for the pool resource.
    // Each pool might have a smaller maximum, because pools for very large
    // objects might impose  smaller limit.
    if (opts.max_blocks_per_chunk == 0)
      {
	// Pick a default that depends on the number of bits in size_t.
	opts.max_blocks_per_chunk = __SIZE_WIDTH__ << 8;
      }
    else
      {
	// TODO round to preferred granularity ?
      }

    if (opts.max_blocks_per_chunk > chunk::max_blocks_per_chunk())
      {
	opts.max_blocks_per_chunk = chunk::max_blocks_per_chunk();
      }

    // largest_required_pool_block specifies the largest block size that will
    // be allocated from a pool. Larger allocations will come directly from
    // the upstream resource and so will not be pooled.
    if (opts.largest_required_pool_block == 0)
      {
	// Pick a sensible default that depends on the number of bits in size_t
	// (pools with larger block sizes must be explicitly requested by
	// using a non-zero value for largest_required_pool_block).
	opts.largest_required_pool_block = __SIZE_WIDTH__ << 6;
      }
    else
      {
	// Round to preferred granularity
	static_assert(std::__ispow2(pool_sizes[0]));
	constexpr size_t mask = pool_sizes[0] - 1;
	opts.largest_required_pool_block += mask;
	opts.largest_required_pool_block &= ~mask;
      }

    if (opts.largest_required_pool_block < big_block::min)
      {
	opts.largest_required_pool_block = big_block::min;
      }
    else if (opts.largest_required_pool_block > std::end(pool_sizes)[-1])
      {
	// Setting _M_opts to the largest pool allows users to query it:
	opts.largest_required_pool_block = std::end(pool_sizes)[-1];
      }
    return opts;
  }

  inline int
  pool_index(size_t block_size, int npools)
  {
    auto p = std::lower_bound(pool_sizes, pool_sizes + npools, block_size);
    int n = p - pool_sizes;
    if (n != npools)
      return n;
    return -1;
  }

  inline int
  select_num_pools(const pool_options& opts)
  {
    auto p = std::lower_bound(std::begin(pool_sizes), std::end(pool_sizes),
			      opts.largest_required_pool_block);
    const int n = p - std::begin(pool_sizes);
    if (p == std::end(pool_sizes))
      return n;
    return n + 1;
  }

#ifdef _GLIBCXX_HAS_GTHREADS
  using shared_lock = std::shared_lock<shared_mutex>;
  using exclusive_lock = lock_guard<shared_mutex>;
#endif

  } // namespace

  __pool_resource::
  __pool_resource(const pool_options& opts, memory_resource* upstream)
  : _M_opts(munge_options(opts)), _M_unpooled(upstream),
    _M_npools(select_num_pools(_M_opts))
  { }

  __pool_resource::~__pool_resource() { release(); }

  void
  __pool_resource::release() noexcept
  {
    memory_resource* res = resource();
    // deallocate oversize allocations
    for (auto& b : _M_unpooled)
      res->deallocate(b.pointer, b.size(), b.align());
    pmr::vector<_BigBlock>{res}.swap(_M_unpooled);
  }

  void*
  __pool_resource::allocate(size_t bytes, size_t alignment)
  {
    auto& b = _M_unpooled.emplace_back(bytes, alignment);
    __try {
      // N.B. need to allocate b.size(), which might be larger than bytes.
      void* p = resource()->allocate(b.size(), alignment);
      b.pointer = p;
      if (_M_unpooled.size() > 1)
	{
	  const auto mid = _M_unpooled.end() - 1;
	  // move to right position in vector
	  std::rotate(std::lower_bound(_M_unpooled.begin(), mid, p),
		      mid, _M_unpooled.end());
	}
      return p;
    } __catch(...) {
      _M_unpooled.pop_back();
      __throw_exception_again;
    }
  }

  void
  __pool_resource::deallocate(void* p, size_t bytes [[maybe_unused]],
			      size_t alignment [[maybe_unused]])
  {
    const auto it
      = std::lower_bound(_M_unpooled.begin(), _M_unpooled.end(), p);
    __glibcxx_assert(it != _M_unpooled.end() && it->pointer == p);
    if (it != _M_unpooled.end() && it->pointer == p) // [[likely]]
      {
	const auto b = *it;
	__glibcxx_assert(b.size() == b.alloc_size(bytes));
	__glibcxx_assert(b.align() == alignment);
	_M_unpooled.erase(it);
	// N.B. need to deallocate b.size(), which might be larger than bytes.
	resource()->deallocate(p, b.size(), b.align());
      }
  }

  // Create array of pools, allocated from upstream resource.
  auto
  __pool_resource::_M_alloc_pools()
  -> _Pool*
  {
    polymorphic_allocator<_Pool> alloc{resource()};
    _Pool* p = alloc.allocate(_M_npools);
    for (int i = 0; i < _M_npools; ++i)
      {
	// For last pool use largest_required_pool_block
	const size_t block_size = (i+1 == _M_npools)
	  ? _M_opts.largest_required_pool_block
	  : pool_sizes[i];

	// Decide on initial number of blocks per chunk.
	// Always have at least 16 blocks per chunk:
	const size_t min_blocks_per_chunk = 16;
	// But for smaller blocks, use a larger initial size:
	size_t blocks_per_chunk
	  = std::max(1024 / block_size, min_blocks_per_chunk);
	// But don't exceed the requested max_blocks_per_chunk:
	blocks_per_chunk
	  = std::min(blocks_per_chunk, _M_opts.max_blocks_per_chunk);
	// Allow space for bitset to track which blocks are used/unused:
	blocks_per_chunk *= 1 - 1.0 / (__CHAR_BIT__ * block_size);
	// Construct a _Pool for the given block size and initial chunk size:
	alloc.construct(p + i, block_size, blocks_per_chunk);
      }
    return p;
  }

#ifdef _GLIBCXX_HAS_GTHREADS
  // synchronized_pool_resource members.

  /* Notes on implementation and thread safety:
   *
   * Each synchronized_pool_resource manages an linked list of N+1 _TPools
   * objects, where N is the number of threads using the pool resource.
   * Each _TPools object has its own set of pools, with their own chunks.
   * The first element of the list, _M_tpools[0], can be used by any thread.
   * The rest of the list contains a _TPools object for each thread,
   * accessed via the thread-specific key _M_key (and referred to for
   * exposition as _M_tpools[_M_key]).
   * The first element, _M_tpools[0], contains "orphaned chunks" which were
   * allocated by a thread which has since exited, and so there is no
   * _M_tpools[_M_key] for that thread.
   * A thread can access its own thread-specific set of pools via _M_key
   * while holding a shared lock on _M_mx. Accessing _M_impl._M_unpooled
   * or _M_tpools[0] or any other thread's _M_tpools[_M_key] requires an
   * exclusive lock.
   * The upstream_resource() pointer can be obtained without a lock, but
   * any dereference of that pointer requires an exclusive lock.
   * The _M_impl._M_opts and _M_impl._M_npools members are immutable,
   * and can safely be accessed concurrently.
   */

  extern "C" {
    static void destroy_TPools(void*);
  }

  struct synchronized_pool_resource::_TPools
  {
    // Exclusive lock must be held in the thread where this constructor runs.
    explicit
    _TPools(synchronized_pool_resource& owner, exclusive_lock&)
    : owner(owner), pools(owner._M_impl._M_alloc_pools())
    {
      // __builtin_printf("%p constructing\n", this);
      __glibcxx_assert(pools);
    }

    // Exclusive lock must be held in the thread where this destructor runs.
    ~_TPools()
    {
      __glibcxx_assert(pools);
      if (pools)
	{
	  memory_resource* r = owner.upstream_resource();
	  for (int i = 0; i < owner._M_impl._M_npools; ++i)
	    pools[i].release(r);
	  std::destroy_n(pools, owner._M_impl._M_npools);
	  polymorphic_allocator<__pool_resource::_Pool> a(r);
	  a.deallocate(pools, owner._M_impl._M_npools);
	}
      if (prev)
	prev->next = next;
      if (next)
	next->prev = prev;
    }

    // Exclusive lock must be held in the thread where this function runs.
    void move_nonempty_chunks()
    {
      __glibcxx_assert(pools);
      if (!pools)
	return;
      memory_resource* r = owner.upstream_resource();
      // move all non-empty chunks to the shared _TPools
      for (int i = 0; i < owner._M_impl._M_npools; ++i)
	for (auto& c : pools[i]._M_chunks)
	  if (!c.empty())
	    owner._M_tpools->pools[i]._M_chunks.insert(std::move(c), r);
    }

    synchronized_pool_resource& owner;
    __pool_resource::_Pool* pools = nullptr;
    _TPools* prev = nullptr;
    _TPools* next = nullptr;

    static void destroy(_TPools* p)
    {
      exclusive_lock l(p->owner._M_mx);
      // __glibcxx_assert(p != p->owner._M_tpools);
      p->move_nonempty_chunks();
      polymorphic_allocator<_TPools> a(p->owner.upstream_resource());
      p->~_TPools();
      a.deallocate(p, 1);
    }
  };

  // Called when a thread exits
  extern "C" {
    static void destroy_TPools(void* p)
    {
      using _TPools = synchronized_pool_resource::_TPools;
      _TPools::destroy(static_cast<_TPools*>(p));
    }
  }

  // Constructor
  synchronized_pool_resource::
  synchronized_pool_resource(const pool_options& opts,
			     memory_resource* upstream)
  : _M_impl(opts, upstream)
  {
    if (int err = __gthread_key_create(&_M_key, destroy_TPools))
      __throw_system_error(err);
    exclusive_lock l(_M_mx);
    _M_tpools = _M_alloc_shared_tpools(l);
  }

  // Destructor
  synchronized_pool_resource::~synchronized_pool_resource()
  {
    release();
    __gthread_key_delete(_M_key); // does not run destroy_TPools
  }

  void
  synchronized_pool_resource::release()
  {
    exclusive_lock l(_M_mx);
    if (_M_tpools)
      {
	__gthread_key_delete(_M_key); // does not run destroy_TPools
	__gthread_key_create(&_M_key, destroy_TPools);
	polymorphic_allocator<_TPools> a(upstream_resource());
	// destroy+deallocate each _TPools
	do
	  {
	    _TPools* p = _M_tpools;
	    _M_tpools = _M_tpools->next;
	    p->~_TPools();
	    a.deallocate(p, 1);
	  }
	while (_M_tpools);
      }
    // release unpooled memory
    _M_impl.release();
  }

  // Caller must hold shared or exclusive lock to ensure the pointer
  // isn't invalidated before it can be used.
  auto
  synchronized_pool_resource::_M_thread_specific_pools() noexcept
  {
    __pool_resource::_Pool* pools = nullptr;
    if (auto tp = static_cast<_TPools*>(__gthread_getspecific(_M_key)))
      {
      pools = tp->pools;
      __glibcxx_assert(tp->pools);
      }
    return pools;
  }

  // Override for memory_resource::do_allocate
  void*
  synchronized_pool_resource::
  do_allocate(size_t bytes, size_t alignment)
  {
    const auto block_size = std::max(bytes, alignment);
    if (block_size <= _M_impl._M_opts.largest_required_pool_block)
      {
	const ptrdiff_t index = pool_index(block_size, _M_impl._M_npools);
	memory_resource* const r = upstream_resource();
	const pool_options opts = _M_impl._M_opts;
	{
	  // Try to allocate from the thread-specific pool
	  shared_lock l(_M_mx);
	  if (auto pools = _M_thread_specific_pools()) // [[likely]]
	    {
	      // Need exclusive lock to replenish so use try_allocate:
	      if (void* p = pools[index].try_allocate())
		return p;
	      // Need to take exclusive lock and replenish pool.
	    }
	  // Need to allocate or replenish thread-specific pools using
	  // upstream resource, so need to hold exclusive lock.
	}
	// N.B. Another thread could call release() now lock is not held.
	exclusive_lock excl(_M_mx);
	if (!_M_tpools) // [[unlikely]]
	  _M_tpools = _M_alloc_shared_tpools(excl);
	auto pools = _M_thread_specific_pools();
	if (!pools)
	  pools = _M_alloc_tpools(excl)->pools;
	return pools[index].allocate(r, opts);
      }
    exclusive_lock l(_M_mx);
    return _M_impl.allocate(bytes, alignment); // unpooled allocation
  }

  // Override for memory_resource::do_deallocate
  void
  synchronized_pool_resource::
  do_deallocate(void* p, size_t bytes, size_t alignment)
  {
    size_t block_size = std::max(bytes, alignment);
    if (block_size <= _M_impl._M_opts.largest_required_pool_block)
      {
	const ptrdiff_t index = pool_index(block_size, _M_impl._M_npools);
	__glibcxx_assert(index != -1);
	{
	  shared_lock l(_M_mx);
	  auto pools = _M_thread_specific_pools();
	  if (pools)
	    {
	      // No need to lock here, no other thread is accessing this pool.
	      if (pools[index].deallocate(upstream_resource(), p))
		return;
	    }
	  // Block might have come from a different thread's pool,
	  // take exclusive lock and check every pool.
	}
	// TODO store {p, bytes, alignment} somewhere and defer returning
	// the block to the correct thread-specific pool until we next
	// take the exclusive lock.
	exclusive_lock excl(_M_mx);
	for (_TPools* t = _M_tpools; t != nullptr; t = t->next)
	  {
	    if (t->pools) // [[likely]]
	      {
		if (t->pools[index].deallocate(upstream_resource(), p))
		  return;
	      }
	  }
      }
    exclusive_lock l(_M_mx);
    _M_impl.deallocate(p, bytes, alignment);
  }

  // Allocate a thread-specific _TPools object and add it to the linked list.
  auto
  synchronized_pool_resource::_M_alloc_tpools(exclusive_lock& l)
  -> _TPools*
  {
    __glibcxx_assert(_M_tpools != nullptr);
    // dump_list(_M_tpools);
    polymorphic_allocator<_TPools> a(upstream_resource());
    _TPools* p = a.allocate(1);
    bool constructed = false;
    __try
      {
	a.construct(p, *this, l);
	constructed = true;
	// __glibcxx_assert(__gthread_getspecific(_M_key) == nullptr);
	if (int err = __gthread_setspecific(_M_key, p))
	  __throw_system_error(err);
      }
    __catch(...)
      {
	if (constructed)
	  a.destroy(p);
	a.deallocate(p, 1);
	__throw_exception_again;
      }
    p->prev = _M_tpools;
    p->next = _M_tpools->next;
    _M_tpools->next = p;
    if (p->next)
      p->next->prev = p;
    return p;
  }

  // Allocate the shared _TPools object, _M_tpools[0]
  auto
  synchronized_pool_resource::_M_alloc_shared_tpools(exclusive_lock& l)
  -> _TPools*
  {
    __glibcxx_assert(_M_tpools == nullptr);
    polymorphic_allocator<_TPools> a(upstream_resource());
    _TPools* p = a.allocate(1);
    __try
      {
	a.construct(p, *this, l);
      }
    __catch(...)
      {
	a.deallocate(p, 1);
	__throw_exception_again;
      }
    // __glibcxx_assert(p->next == nullptr);
    // __glibcxx_assert(p->prev == nullptr);
    return p;
  }
#endif // _GLIBCXX_HAS_GTHREADS

  // unsynchronized_pool_resource member functions

  // Constructor
  unsynchronized_pool_resource::
  unsynchronized_pool_resource(const pool_options& opts,
			       memory_resource* upstream)
  : _M_impl(opts, upstream), _M_pools(_M_impl._M_alloc_pools())
  { }

  // Destructor
  unsynchronized_pool_resource::~unsynchronized_pool_resource()
  { release(); }

  // Return all memory to upstream resource.
  void
  unsynchronized_pool_resource::release()
  {
    // release pooled memory
    if (_M_pools)
      {
	memory_resource* res = upstream_resource();
	polymorphic_allocator<_Pool> alloc{res};
	for (int i = 0; i < _M_impl._M_npools; ++i)
	  {
	    _M_pools[i].release(res);
	    alloc.destroy(_M_pools + i);
	  }
	alloc.deallocate(_M_pools, _M_impl._M_npools);
	_M_pools = nullptr;
      }

    // release unpooled memory
    _M_impl.release();
  }

  // Find the right pool for a block of size block_size.
  auto
  unsynchronized_pool_resource::_M_find_pool(size_t block_size) noexcept
  {
    __pool_resource::_Pool* pool = nullptr;
    if (_M_pools) // [[likely]]
      {
	int index = pool_index(block_size, _M_impl._M_npools);
	if (index != -1)
	  pool = _M_pools + index;
      }
    return pool;
  }

  // Override for memory_resource::do_allocate
  void*
  unsynchronized_pool_resource::do_allocate(size_t bytes, size_t alignment)
  {
    const auto block_size = std::max(bytes, alignment);
    if (block_size <= _M_impl._M_opts.largest_required_pool_block)
      {
	// Recreate pools if release() has been called:
	if (__builtin_expect(_M_pools == nullptr, false))
	  _M_pools = _M_impl._M_alloc_pools();
	if (auto pool = _M_find_pool(block_size))
	  return pool->allocate(upstream_resource(), _M_impl._M_opts);
      }
    return _M_impl.allocate(bytes, alignment);
  }

  // Override for memory_resource::do_deallocate
  void
  unsynchronized_pool_resource::
  do_deallocate(void* p, size_t bytes, size_t alignment)
  {
    size_t block_size = std::max(bytes, alignment);
    if (block_size <= _M_impl._M_opts.largest_required_pool_block)
      {
	if (auto pool = _M_find_pool(block_size))
	  {
	    pool->deallocate(upstream_resource(), p);
	    return;
	  }
      }
    _M_impl.deallocate(p, bytes, alignment);
  }

} // namespace pmr
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace std